diff --git a/docs/changelog/114193.yaml b/docs/changelog/114193.yaml new file mode 100644 index 0000000000000..f18f9359007b8 --- /dev/null +++ b/docs/changelog/114193.yaml @@ -0,0 +1,5 @@ +pr: 114193 +summary: Add postal_code support to the City and Enterprise databases +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/114268.yaml b/docs/changelog/114268.yaml new file mode 100644 index 0000000000000..5e4457005d7d3 --- /dev/null +++ b/docs/changelog/114268.yaml @@ -0,0 +1,5 @@ +pr: 114268 +summary: Support more maxmind fields in the geoip processor +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/114521.yaml b/docs/changelog/114521.yaml new file mode 100644 index 0000000000000..c3a9c7cdd0848 --- /dev/null +++ b/docs/changelog/114521.yaml @@ -0,0 +1,5 @@ +pr: 114521 +summary: Add support for registered country fields for maxmind geoip databases +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/114548.yaml b/docs/changelog/114548.yaml new file mode 100644 index 0000000000000..b9692bcb2d10c --- /dev/null +++ b/docs/changelog/114548.yaml @@ -0,0 +1,5 @@ +pr: 114548 +summary: Support IPinfo database configurations +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/114934.yaml b/docs/changelog/114934.yaml new file mode 100644 index 0000000000000..68628993b1c80 --- /dev/null +++ b/docs/changelog/114934.yaml @@ -0,0 +1,6 @@ +pr: 114934 +summary: "[ES|QL] To_DatePeriod and To_TimeDuration return better error messages on\ + \ `union_type` fields" +area: ES|QL +type: bug +issues: [] diff --git a/docs/changelog/116583.yaml b/docs/changelog/116583.yaml new file mode 100644 index 0000000000000..3dc8337fe5b86 --- /dev/null +++ b/docs/changelog/116583.yaml @@ -0,0 +1,7 @@ +pr: 116583 +summary: Fix NPE in `EnrichLookupService` on mixed clusters with <8.14 versions +area: ES|QL +type: bug +issues: + - 116529 + - 116544 diff --git a/docs/reference/how-to/knn-search.asciidoc b/docs/reference/how-to/knn-search.asciidoc index e884c01dd3509..60c32cabdb5c1 100644 --- a/docs/reference/how-to/knn-search.asciidoc +++ b/docs/reference/how-to/knn-search.asciidoc @@ -95,13 +95,20 @@ and https://elasticsearch-benchmarks.elastic.co/#tracks/dense_vector[here] some of datasets and configurations that we use for our nightly benchmarks. [discrete] +[[dense-vector-preloading]] include::search-speed.asciidoc[tag=warm-fs-cache] The following file extensions are used for the approximate kNN search: +Each extension is broken down by the quantization types. -* `vec` and `veq` for vector values -* `vex` for HNSW graph -* `vem`, `vemf`, and `vemq` for metadata +* `vex` for the HNSW graph +* `vec` for all non-quantized vector values. This includes all element types: `float`, `byte`, and `bit`. +* `veq` for quantized vectors indexed with <>: `int4` or `int8` +* `veb` for binary vectors indexed with <>: `bbq` +* `vem`, `vemf`, `vemq`, and `vemb` for metadata, usually small and not a concern for preloading + +Generally, if you are using a quantized index, you should only preload the relevant quantized values and the HNSW graph. +Preloading the raw vectors is not necessary and might be counterproductive. [discrete] === Reduce the number of index segments diff --git a/docs/reference/index-modules/store.asciidoc b/docs/reference/index-modules/store.asciidoc index 9b30ba9dbde35..aba0850c76437 100644 --- a/docs/reference/index-modules/store.asciidoc +++ b/docs/reference/index-modules/store.asciidoc @@ -143,8 +143,8 @@ terms dictionaries, postings lists and points, which are the most important parts of the index for search and aggregations. For vector search, you use <>, -you might want to set the setting to vector search files: `["vec", "vex", "vem"]` -("vec" is used for vector values, "vex" – for HNSW graph, "vem" – for metadata). +you might want to set the setting to vector search files. See <> for a detailed +list of the files. Note that this setting can be dangerous on indices that are larger than the size of the main memory of the host, as it would cause the filesystem cache to be diff --git a/docs/reference/ingest/apis/delete-geoip-database.asciidoc b/docs/reference/ingest/apis/delete-ip-location-database.asciidoc similarity index 52% rename from docs/reference/ingest/apis/delete-geoip-database.asciidoc rename to docs/reference/ingest/apis/delete-ip-location-database.asciidoc index 957e59f0f0de4..c3a10a914d2f4 100644 --- a/docs/reference/ingest/apis/delete-geoip-database.asciidoc +++ b/docs/reference/ingest/apis/delete-ip-location-database.asciidoc @@ -1,30 +1,30 @@ -[[delete-geoip-database-api]] -=== Delete geoip database configuration API +[[delete-ip-location-database-api]] +=== Delete IP geolocation database configuration API ++++ -Delete geoip database configuration +Delete IP geolocation database configuration ++++ -Deletes a geoip database configuration. +Deletes a IP geolocation database configuration. [source,console] ---- -DELETE /_ingest/geoip/database/my-database-id +DELETE /_ingest/ip_location/database/my-database-id ---- // TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] -[[delete-geoip-database-api-request]] +[[delete-ip-location-database-api-request]] ==== {api-request-title} -`DELETE /_ingest/geoip/database/` +`DELETE /_ingest/ip_location/database/` -[[delete-geoip-database-api-prereqs]] +[[delete-ip-location-database-api-prereqs]] ==== {api-prereq-title} * If the {es} {security-features} are enabled, you must have the `manage` <> to use this API. -[[delete-geoip-database-api-path-params]] +[[delete-ip-location-database-api-path-params]] ==== {api-path-parms-title} ``:: @@ -35,21 +35,21 @@ DELETE /_ingest/geoip/database/my-database-id -- -[[delete-geoip-database-api-query-params]] +[[delete-ip-location-database-api-query-params]] ==== {api-query-parms-title} include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] -[[delete-geoip-database-api-example]] +[[delete-ip-location-database-api-example]] ==== {api-examples-title} -[[delete-geoip-database-api-specific-ex]] -===== Delete a specific geoip database configuration +[[delete-ip-location-database-api-specific-ex]] +===== Delete a specific IP geolocation database configuration [source,console] ---- -DELETE /_ingest/geoip/database/example-database-id +DELETE /_ingest/ip_location/database/example-database-id ---- // TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/get-geoip-database.asciidoc b/docs/reference/ingest/apis/get-ip-location-database.asciidoc similarity index 65% rename from docs/reference/ingest/apis/get-geoip-database.asciidoc rename to docs/reference/ingest/apis/get-ip-location-database.asciidoc index f055e3e759db8..26e9ddc1eee50 100644 --- a/docs/reference/ingest/apis/get-geoip-database.asciidoc +++ b/docs/reference/ingest/apis/get-ip-location-database.asciidoc @@ -1,33 +1,33 @@ -[[get-geoip-database-api]] -=== Get geoip database configuration API +[[get-ip-location-database-api]] +=== Get IP geolocation database configuration API ++++ -Get geoip database configuration +Get IP geolocation database configuration ++++ -Returns information about one or more geoip database configurations. +Returns information about one or more IP geolocation database configurations. [source,console] ---- -GET /_ingest/geoip/database/my-database-id +GET /_ingest/ip_location/database/my-database-id ---- // TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] -[[get-geoip-database-api-request]] +[[get-ip-location-database-api-request]] ==== {api-request-title} -`GET /_ingest/geoip/database/` +`GET /_ingest/ip_location/database/` -`GET /_ingest/geoip/database` +`GET /_ingest/ip_location/database` -[[get-geoip-database-api-prereqs]] +[[get-ip-location-database-api-prereqs]] ==== {api-prereq-title} * If the {es} {security-features} are enabled, you must have the `manage` <> to use this API. -[[get-geoip-database-api-path-params]] +[[get-ip-location-database-api-path-params]] ==== {api-path-parms-title} ``:: @@ -38,22 +38,22 @@ supported. To get all database configurations, omit this parameter or use `*`. -[[get-geoip-database-api-query-params]] +[[get-ip-location-database-api-query-params]] ==== {api-query-parms-title} include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] -[[get-geoip-database-api-example]] +[[get-ip-location-database-api-example]] ==== {api-examples-title} -[[get-geoip-database-api-specific-ex]] -===== Get information for a specific geoip database configuration +[[get-ip-location-database-api-specific-ex]] +===== Get information for a specific IP geolocation database configuration [source,console] ---- -GET /_ingest/geoip/database/my-database-id +GET /_ingest/ip_location/database/my-database-id ---- // TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/index.asciidoc b/docs/reference/ingest/apis/index.asciidoc index e068f99ea0ad3..35adc47821978 100644 --- a/docs/reference/ingest/apis/index.asciidoc +++ b/docs/reference/ingest/apis/index.asciidoc @@ -25,16 +25,14 @@ Use the following APIs to get statistics about ingest processing: the <>. [discrete] -[[ingest-geoip-database-apis]] -=== Ingest GeoIP Database APIs - -preview::["The commercial IP geolocation database download management APIs are in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] +[[ingest-ip-location-database-apis]] +=== Ingest IP Location Database APIs Use the following APIs to configure and manage commercial IP geolocation database downloads: -* <> to create or update a database configuration -* <> to retrieve a database configuration -* <> to delete a database configuration +* <> to create or update a database configuration +* <> to retrieve a database configuration +* <> to delete a database configuration include::put-pipeline.asciidoc[] include::get-pipeline.asciidoc[] @@ -42,6 +40,6 @@ include::delete-pipeline.asciidoc[] include::simulate-pipeline.asciidoc[] include::simulate-ingest.asciidoc[] include::geoip-stats.asciidoc[] -include::put-geoip-database.asciidoc[] -include::get-geoip-database.asciidoc[] -include::delete-geoip-database.asciidoc[] +include::put-ip-location-database.asciidoc[] +include::get-ip-location-database.asciidoc[] +include::delete-ip-location-database.asciidoc[] diff --git a/docs/reference/ingest/apis/put-geoip-database.asciidoc b/docs/reference/ingest/apis/put-geoip-database.asciidoc deleted file mode 100644 index 311c303002387..0000000000000 --- a/docs/reference/ingest/apis/put-geoip-database.asciidoc +++ /dev/null @@ -1,72 +0,0 @@ -[[put-geoip-database-api]] -=== Create or update geoip database configuration API -++++ -Create or update geoip database configuration -++++ - -Creates or updates an IP geolocation database configuration. - -IMPORTANT: The Maxmind `account_id` shown below requires a license key. Because the license key is sensitive information, -it is stored as a <> in {es} named `ingest.geoip.downloader.maxmind.license_key`. Only -one Maxmind license key is currently allowed per {es} cluster. A valid license key must be in the secure settings in order -to download from Maxmind. The license key setting does not take effect until all nodes are restarted. - -[source,console] ----- -PUT _ingest/geoip/database/my-database-id -{ - "name": "GeoIP2-Domain", - "maxmind": { - "account_id": "1025402" - } -} ----- -// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] - -[[put-geoip-database-api-request]] -==== {api-request-title} - -`PUT /_ingest/geoip/database/` - -[[put-geoip-database-api-prereqs]] -==== {api-prereq-title} - -* If the {es} {security-features} are enabled, you must have the -`manage` <> to use this API. - - -[[put-geoip-database-api-path-params]] -==== {api-path-parms-title} - -``:: -+ -__ -(Required, string) ID of the database configuration to create or update. - -[[put-geoip-database-api-query-params]] -==== {api-query-parms-title} - -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] - -[[put-geoip-database-api-request-body]] -==== {api-request-body-title} - -// tag::geoip-database-object[] -`name`:: -(Required, string) -The provider-assigned name of the IP geolocation database to download. - -``:: -(Required, a provider object and its associated configuration) -The configuration necessary to identify which IP geolocation provider to use to download -the database, as well as any provider-specific configuration necessary for such downloading. -+ -At present, the only supported provider is `maxmind`, and the maxmind provider -requires that an `account_id` (string) is configured. -// end::geoip-database-object[] - -[[geoip-database-configuration-licensing]] -==== Licensing - -Downloading databases from third party providers is a commercial feature that requires an -appropriate license. For more information, refer to https://www.elastic.co/subscriptions. diff --git a/docs/reference/ingest/apis/put-ip-location-database.asciidoc b/docs/reference/ingest/apis/put-ip-location-database.asciidoc new file mode 100644 index 0000000000000..e42d84752694c --- /dev/null +++ b/docs/reference/ingest/apis/put-ip-location-database.asciidoc @@ -0,0 +1,92 @@ +[[put-ip-location-database-api]] +=== Create or update IP geolocation database configuration API +++++ +Create or update IP geolocation database configuration +++++ + +Creates or updates an IP geolocation database configuration. + +IMPORTANT: The Maxmind `account_id` shown below requires a license key. Because the license key is sensitive information, +it is stored as a <> in {es} named `ingest.geoip.downloader.maxmind.license_key`. Only +one Maxmind license key is currently allowed per {es} cluster. A valid license key must be in the secure settings in order +to download from Maxmind. The license key setting does not take effect until all nodes are restarted or a +<> request is executed. + +[source,console] +---- +PUT _ingest/ip_location/database/my-database-1 +{ + "name": "GeoIP2-Domain", + "maxmind": { + "account_id": "1234567" + } +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + +IMPORTANT: The IPinfo configuration shown below requires a token. Because the token is sensitive information, +it is stored as a <> in {es} named `ingest.ip_location.downloader.ipinfo.token`. Only +one IPinfo token is currently allowed per {es} cluster. A valid token must be in the secure settings in order +to download from IPinfo. The token setting does not take effect until all nodes are restarted or a +<> request is executed. + +[source,console] +---- +PUT _ingest/ip_location/database/my-database-2 +{ + "name": "standard_location", + "ipinfo": { + } +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + + +[[put-ip-location-database-api-request]] +==== {api-request-title} + +`PUT /_ingest/ip_location/database/` + +[[put-ip-location-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the +`manage` <> to use this API. + + +[[put-ip-location-database-api-path-params]] +==== {api-path-parms-title} + +``:: ++ +__ +(Required, string) ID of the database configuration to create or update. + +[[put-ip-location-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] + +[[put-ip-location-database-api-request-body]] +==== {api-request-body-title} + +// tag::ip-location-database-object[] +`name`:: +(Required, string) +The provider-assigned name of the IP geolocation database to download. + +``:: +(Required, a provider object and its associated configuration) +The configuration necessary to identify which IP geolocation provider to use to download +the database, as well as any provider-specific configuration necessary for such downloading. ++ +At present, the only supported providers are `maxmind` and `ipinfo`. The maxmind provider +requires that an `account_id` (string) is configured. The ipinfo provider does not require +additional configuration in the request body. +// end::ip-location-database-object[] + +[[ip-location-database-configuration-licensing]] +==== Licensing + +Downloading databases from third party providers is a commercial feature that requires an +appropriate license. For more information, refer to https://www.elastic.co/subscriptions. diff --git a/docs/reference/ingest/processors.asciidoc b/docs/reference/ingest/processors.asciidoc index 8f7cef06d12a0..f4fcc0fc84d0d 100644 --- a/docs/reference/ingest/processors.asciidoc +++ b/docs/reference/ingest/processors.asciidoc @@ -77,7 +77,10 @@ Computes a hash of the document’s content. Converts geo-grid definitions of grid tiles or cells to regular bounding boxes or polygons which describe their shape. <>:: -Adds information about the geographical location of an IPv4 or IPv6 address. +Adds information about the geographical location of an IPv4 or IPv6 address from a Maxmind database. + +<>:: +Adds information about the geographical location of an IPv4 or IPv6 address from an ip geolocation database. <>:: Calculates the network direction given a source IP address, destination IP address, and a list of internal networks. @@ -245,6 +248,7 @@ include::processors/grok.asciidoc[] include::processors/gsub.asciidoc[] include::processors/html_strip.asciidoc[] include::processors/inference.asciidoc[] +include::processors/ip-location.asciidoc[] include::processors/join.asciidoc[] include::processors/json.asciidoc[] include::processors/kv.asciidoc[] diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 3a9ba58dedbf0..78ebe3f5b5ee3 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -13,7 +13,7 @@ ASN IP geolocation databases from http://dev.maxmind.com/geoip/geoip2/geolite2/[ CC BY-SA 4.0 license. It automatically downloads these databases if your nodes can connect to `storage.googleapis.com` domain and either: * `ingest.geoip.downloader.eager.download` is set to true -* your cluster has at least one pipeline with a `geoip` processor +* your cluster has at least one pipeline with a `geoip` or `ip_location` processor {es} automatically downloads updates for these databases from the Elastic GeoIP endpoint: @@ -25,10 +25,10 @@ If your cluster can't connect to the Elastic GeoIP endpoint or you want to manage your own updates, see <>. If you would like to have {es} download database files directly from Maxmind using your own provided -license key, see <>. +license key, see <>. If {es} can't connect to the endpoint for 30 days all updated databases will become -invalid. {es} will stop enriching documents with geoip data and will add `tags: ["_geoip_expired_database"]` +invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_geoip_expired_database"]` field instead. [[using-ingest-geoip]] @@ -40,21 +40,23 @@ field instead. |====== | Name | Required | Default | Description | `field` | yes | - | The field to get the IP address from for the geographical lookup. -| `target_field` | no | geoip | The field that will hold the geographical information looked up from the MaxMind database. -| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). -| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup. +| `target_field` | no | geoip | The field that will hold the geographical information looked up from the database. +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). +| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document -| `first_only` | no | `true` | If `true` only first found geoip data will be returned, even if `field` contains array +| `first_only` | no | `true` | If `true` only first found ip geolocation data, will be returned, even if `field` contains array | `download_database_on_pipeline_creation` | no | `true` | If `true` (and if `ingest.geoip.downloader.eager.download` is `false`), the missing database is downloaded when the pipeline is created. Else, the download is triggered by when the pipeline is used as the `default_pipeline` or `final_pipeline` in an index. |====== *Depends on what is available in `database_file`: * If a GeoLite2 City or GeoIP2 City database is used, then the following fields may be added under the `target_field`: `ip`, -`country_iso_code`, `country_name`, `continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, -and `location`. The fields actually added depend on what has been found and which properties were configured in `properties`. +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, and `accuracy_radius`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If a GeoLite2 Country or GeoIP2 Country database is used, then the following fields may be added under the `target_field`: `ip`, -`country_iso_code`, `country_name`, `continent_code`, and `continent_name`. The fields actually added depend on what has been found +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, and `continent_name`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoLite2 ASN database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, `organization_name` and `network`. The fields actually added depend on what has been found and which properties were configured @@ -70,12 +72,12 @@ The fields actually added depend on what has been found and which properties wer `organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, -`country_iso_code`, `country_name`, `continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, -`location`, `asn`, `organization_name`, `network`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, `accuracy_radius`, `country_confidence`, `city_confidence`, `postal_confidence`, `asn`, `organization_name`, `network`, +`hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, `residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and -`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. - -preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] +`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. Here is an example that uses the default city database and adds the geographical information to the `geoip` field based on the `ip` field: @@ -83,7 +85,7 @@ Here is an example that uses the default city database and adds the geographical -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -134,7 +136,7 @@ this database is downloaded automatically. So this: -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -186,7 +188,7 @@ cannot be found: -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -252,7 +254,7 @@ PUT my_ip_locations -------------------------------------------------- PUT _ingest/pipeline/geoip { - "description" : "Add geoip info", + "description" : "Add ip geolocation info", "processors" : [ { "geoip" : { @@ -425,7 +427,7 @@ The `geoip` processor supports the following setting: The maximum number of results that should be cached. Defaults to `1000`. -Note that these settings are node settings and apply to all `geoip` processors, i.e. there is one cache for all defined `geoip` processors. +Note that these settings are node settings and apply to all `geoip` and `ip_location` processors, i.e. there is a single cache for all such processors. [[geoip-cluster-settings]] ===== Cluster settings @@ -454,7 +456,7 @@ each node's <> at `$ES_TMPDIR/geoip-databases/IP Location +++++ + +The `ip_location` processor adds information about the geographical location of an +IPv4 or IPv6 address. + +[[ip-location-automatic-updates]] +By default, the processor uses the GeoLite2 City, GeoLite2 Country, and GeoLite2 +ASN IP geolocation databases from http://dev.maxmind.com/geoip/geoip2/geolite2/[MaxMind], shared under the +CC BY-SA 4.0 license. It automatically downloads these databases if your nodes can connect to `storage.googleapis.com` domain and either: + +* `ingest.geoip.downloader.eager.download` is set to true +* your cluster has at least one pipeline with a `geoip` or `ip_location` processor + +{es} automatically downloads updates for these databases from the Elastic GeoIP +endpoint: +https://geoip.elastic.co/v1/database?elastic_geoip_service_tos=agree[https://geoip.elastic.co/v1/database]. +To get download statistics for these updates, use the <>. + +If your cluster can't connect to the Elastic GeoIP endpoint or you want to +manage your own updates, see <>. + +If you would like to have {es} download database files directly from Maxmind using your own provided +license key, see <>. + +If {es} can't connect to the endpoint for 30 days all updated databases will become +invalid. {es} will stop enriching documents with ip geolocation data and will add `tags: ["_ip_location_expired_database"]` +field instead. + +[[using-ingest-ip-location]] +==== Using the `ip_location` Processor in a Pipeline + +[[ingest-ip-location-options]] +.`ip-location` options +[options="header"] +|====== +| Name | Required | Default | Description +| `field` | yes | - | The field to get the IP address from for the geographical lookup. +| `target_field` | no | ip_location | The field that will hold the geographical information looked up from the database. +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). +| `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the ip geolocation lookup. +| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document +| `first_only` | no | `true` | If `true` only first found ip geolocation data, will be returned, even if `field` contains array +| `download_database_on_pipeline_creation` | no | `true` | If `true` (and if `ingest.geoip.downloader.eager.download` is `false`), the missing database is downloaded when the pipeline is created. Else, the download is triggered by when the pipeline is used as the `default_pipeline` or `final_pipeline` in an index. +|====== + +*Depends on what is available in `database_file`: + +* If a GeoLite2 City or GeoIP2 City database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, and `accuracy_radius`. The fields actually added depend on what has been found and which properties were configured in `properties`. +* If a GeoLite2 Country or GeoIP2 Country database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, and `continent_name`. The fields actually added depend on what has been found +and which properties were configured in `properties`. +* If the GeoLite2 ASN database is used, then the following fields may be added under the `target_field`: `ip`, +`asn`, `organization_name` and `network`. The fields actually added depend on what has been found and which properties were configured +in `properties`. +* If the GeoIP2 Anonymous IP database is used, then the following fields may be added under the `target_field`: `ip`, +`hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, and `residential_proxy`. The fields actually added +depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Connection Type database is used, then the following fields may be added under the `target_field`: `ip`, and +`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. +The fields actually added depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 ISP database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, +`organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added +depend on what has been found and which properties were configured in `properties`. +* If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, +`country_iso_code`, `country_name`, `country_in_european_union`, `registered_country_iso_code`, `registered_country_name`, `registered_country_in_european_union`, +`continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `postal_code`, `timezone`, +`location`, `accuracy_radius`, `country_confidence`, `city_confidence`, `postal_confidence`, `asn`, `organization_name`, `network`, +`hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, +`residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and +`connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. + +Here is an example that uses the default city database and adds the geographical information to the `ip_location` field based on the `ip` field: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/ip_location +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "ip_location" : { + "field" : "ip" + } + } + ] +} +PUT my-index-000001/_doc/my_id?pipeline=ip_location +{ + "ip": "89.160.20.128" +} +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +Which returns: + +[source,console-result] +-------------------------------------------------- +{ + "found": true, + "_index": "my-index-000001", + "_id": "my_id", + "_version": 1, + "_seq_no": 55, + "_primary_term": 1, + "_source": { + "ip": "89.160.20.128", + "ip_location": { + "continent_name": "Europe", + "country_name": "Sweden", + "country_iso_code": "SE", + "city_name" : "Linköping", + "region_iso_code" : "SE-E", + "region_name" : "Östergötland County", + "location": { "lat": 58.4167, "lon": 15.6167 } + } + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term":1/"_primary_term" : $body._primary_term/] + +Here is an example that uses the default country database and adds the +geographical information to the `geo` field based on the `ip` field. Note that +this database is downloaded automatically. So this: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/ip_location +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "ip_location" : { + "field" : "ip", + "target_field" : "geo", + "database_file" : "GeoLite2-Country.mmdb" + } + } + ] +} +PUT my-index-000001/_doc/my_id?pipeline=ip_location +{ + "ip": "89.160.20.128" +} +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +returns this: + +[source,console-result] +-------------------------------------------------- +{ + "found": true, + "_index": "my-index-000001", + "_id": "my_id", + "_version": 1, + "_seq_no": 65, + "_primary_term": 1, + "_source": { + "ip": "89.160.20.128", + "geo": { + "continent_name": "Europe", + "country_name": "Sweden", + "country_iso_code": "SE" + } + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no": \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] + + +Not all IP addresses find geo information from the database, When this +occurs, no `target_field` is inserted into the document. + +Here is an example of what documents will be indexed as when information for "80.231.5.0" +cannot be found: + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/ip_location +{ + "description" : "Add ip geolocation info", + "processors" : [ + { + "ip_location" : { + "field" : "ip" + } + } + ] +} + +PUT my-index-000001/_doc/my_id?pipeline=ip_location +{ + "ip": "80.231.5.0" +} + +GET my-index-000001/_doc/my_id +-------------------------------------------------- + +Which returns: + +[source,console-result] +-------------------------------------------------- +{ + "_index" : "my-index-000001", + "_id" : "my_id", + "_version" : 1, + "_seq_no" : 71, + "_primary_term": 1, + "found" : true, + "_source" : { + "ip" : "80.231.5.0" + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index e0568f500f268..506dff7891ad2 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -1926,4 +1926,19 @@ Refer to <>. [role="exclude",id="remote-clusters-privileges"] === Configure roles and users for remote clusters -Refer to <>. \ No newline at end of file +Refer to <>. + +[role="exclude",id="put-geoip-database-api"] +=== Create or update geoip database configuration API + +Refer to <>. + +[role="exclude",id="get-geoip-database-api"] +=== Get geoip database configuration + +Refer to <>. + +[role="exclude",id="delete-geoip-database-api"] +=== Delete geoip database configuration API + +Refer to <>. diff --git a/docs/reference/rest-api/security/bulk-create-roles.asciidoc b/docs/reference/rest-api/security/bulk-create-roles.asciidoc index a198f49383907..560e8b74cdd2c 100644 --- a/docs/reference/rest-api/security/bulk-create-roles.asciidoc +++ b/docs/reference/rest-api/security/bulk-create-roles.asciidoc @@ -1,7 +1,6 @@ [role="xpack"] [[security-api-bulk-put-role]] === Bulk create or update roles API -preview::[] ++++ Bulk create or update roles API ++++ diff --git a/docs/reference/rest-api/security/bulk-delete-roles.asciidoc b/docs/reference/rest-api/security/bulk-delete-roles.asciidoc index a782b5e37fcb9..b9978c89bef3a 100644 --- a/docs/reference/rest-api/security/bulk-delete-roles.asciidoc +++ b/docs/reference/rest-api/security/bulk-delete-roles.asciidoc @@ -1,7 +1,6 @@ [role="xpack"] [[security-api-bulk-delete-role]] === Bulk delete roles API -preview::[] ++++ Bulk delete roles API ++++ diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java index c283f9fd93957..c4cdacd135cb4 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java @@ -13,7 +13,7 @@ import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.search.aggregations.AggregationReduceContext; import org.elasticsearch.search.aggregations.AggregatorReducer; import org.elasticsearch.search.aggregations.InternalAggregation; @@ -68,7 +68,7 @@ public void writeTo(StreamOutput out) throws IOException { @Override public Map getKey() { - return TimeSeriesIdFieldMapper.decodeTsidAsMap(key); + return RoutingPathFields.decodeAsMap(key); } @Override diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java index a2fa617ed902b..c74637330dd7a 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java @@ -13,6 +13,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -161,11 +162,11 @@ public void collect(int doc, long bucket) throws IOException { if (currentTsidOrd == aggCtx.getTsidHashOrd()) { tsid = currentTsid; } else { - TimeSeriesIdFieldMapper.TimeSeriesIdBuilder tsidBuilder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); + RoutingPathFields routingPathFields = new RoutingPathFields(null); for (TsidConsumer consumer : dimensionConsumers.values()) { - consumer.accept(doc, tsidBuilder); + consumer.accept(doc, routingPathFields); } - currentTsid = tsid = tsidBuilder.buildLegacyTsid().toBytesRef(); + currentTsid = tsid = TimeSeriesIdFieldMapper.buildLegacyTsid(routingPathFields).toBytesRef(); } long bucketOrdinal = bucketOrds.add(bucket, tsid); if (bucketOrdinal < 0) { // already seen @@ -189,6 +190,6 @@ InternalTimeSeries buildResult(InternalTimeSeries.InternalBucket[] topBuckets) { @FunctionalInterface interface TsidConsumer { - void accept(int docId, TimeSeriesIdFieldMapper.TimeSeriesIdBuilder tsidBuilder) throws IOException; + void accept(int docId, RoutingPathFields routingFields) throws IOException; } } diff --git a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeriesTests.java b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeriesTests.java index be841da07ada9..e61c02e0b9cd2 100644 --- a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeriesTests.java +++ b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeriesTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.MockBigArrays; import org.elasticsearch.common.util.MockPageCacheRecycler; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; import org.elasticsearch.search.aggregations.AggregationReduceContext; @@ -42,12 +43,12 @@ private List randomBuckets(boolean keyed, InternalAggregations a List> keys = randomKeys(bucketKeys(randomIntBetween(1, 4)), numberOfBuckets); for (int j = 0; j < numberOfBuckets; j++) { long docCount = randomLongBetween(0, Long.MAX_VALUE / (20L * numberOfBuckets)); - var builder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); + var routingPathFields = new RoutingPathFields(null); for (var entry : keys.get(j).entrySet()) { - builder.addString(entry.getKey(), (String) entry.getValue()); + routingPathFields.addString(entry.getKey(), (String) entry.getValue()); } try { - var key = builder.buildLegacyTsid().toBytesRef(); + var key = TimeSeriesIdFieldMapper.buildLegacyTsid(routingPathFields).toBytesRef(); bucketList.add(new InternalBucket(key, docCount, aggregations, keyed)); } catch (IOException e) { throw new UncheckedIOException(e); diff --git a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregatorTests.java b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregatorTests.java index 26611127a94df..d9a4023457126 100644 --- a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregatorTests.java +++ b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregatorTests.java @@ -30,8 +30,8 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; @@ -93,10 +93,10 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens final List fields = new ArrayList<>(); fields.add(new SortedNumericDocValuesField(DataStreamTimestampFieldMapper.DEFAULT_PATH, timestamp)); fields.add(new LongPoint(DataStreamTimestampFieldMapper.DEFAULT_PATH, timestamp)); - final TimeSeriesIdBuilder builder = new TimeSeriesIdBuilder(null); + RoutingPathFields routingPathFields = new RoutingPathFields(null); for (int i = 0; i < dimensions.length; i += 2) { if (dimensions[i + 1] instanceof Number n) { - builder.addLong(dimensions[i].toString(), n.longValue()); + routingPathFields.addLong(dimensions[i].toString(), n.longValue()); if (dimensions[i + 1] instanceof Integer || dimensions[i + 1] instanceof Long) { fields.add(new NumericDocValuesField(dimensions[i].toString(), ((Number) dimensions[i + 1]).longValue())); } else if (dimensions[i + 1] instanceof Float) { @@ -105,7 +105,7 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens fields.add(new DoubleDocValuesField(dimensions[i].toString(), (double) dimensions[i + 1])); } } else { - builder.addString(dimensions[i].toString(), dimensions[i + 1].toString()); + routingPathFields.addString(dimensions[i].toString(), dimensions[i + 1].toString()); fields.add(new SortedSetDocValuesField(dimensions[i].toString(), new BytesRef(dimensions[i + 1].toString()))); } } @@ -118,7 +118,9 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens fields.add(new DoubleDocValuesField(metrics[i].toString(), (double) metrics[i + 1])); } } - fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.buildLegacyTsid().toBytesRef())); + fields.add( + new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, TimeSeriesIdFieldMapper.buildLegacyTsid(routingPathFields).toBytesRef()) + ); iw.addDocument(fields); } diff --git a/muted-tests.yml b/muted-tests.yml index f49b303a2bc50..3273b203b0982 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -266,6 +266,30 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_crud/Test force delete given model referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/116555 +- class: org.elasticsearch.smoketest.MlWithSecurityIT + method: test {yaml=ml/data_frame_analytics_crud/Test delete given stopped config} + issue: https://github.com/elastic/elasticsearch/issues/116608 +- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT + method: test {categorize.Categorize} + issue: https://github.com/elastic/elasticsearch/issues/116434 +- class: org.elasticsearch.upgrades.SearchStatesIT + method: testBWCSearchStates + issue: https://github.com/elastic/elasticsearch/issues/116617 +- class: org.elasticsearch.upgrades.SearchStatesIT + method: testCanMatch + issue: https://github.com/elastic/elasticsearch/issues/116618 +- class: org.elasticsearch.packaging.test.ArchiveGenerateInitialCredentialsTests + method: test20NoAutoGenerationWhenAutoConfigurationDisabled + issue: https://github.com/elastic/elasticsearch/issues/116619 +- class: org.elasticsearch.packaging.test.BootstrapCheckTests + method: test20RunWithBootstrapChecks + issue: https://github.com/elastic/elasticsearch/issues/116620 +- class: org.elasticsearch.smoketest.MlWithSecurityIT + method: test {yaml=ml/inference_crud/Test force delete given model referenced by pipeline} + issue: https://github.com/elastic/elasticsearch/issues/116624 +- class: org.elasticsearch.packaging.test.DockerTests + method: test011SecurityEnabledStatus + issue: https://github.com/elastic/elasticsearch/issues/116628 # Examples: # diff --git a/server/src/main/java/org/elasticsearch/index/IndexMode.java b/server/src/main/java/org/elasticsearch/index/IndexMode.java index e6339344b6e5f..f5f923f3657f8 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexMode.java +++ b/server/src/main/java/org/elasticsearch/index/IndexMode.java @@ -23,7 +23,6 @@ import org.elasticsearch.index.codec.CodecService; import org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper; -import org.elasticsearch.index.mapper.DocumentDimensions; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -33,6 +32,8 @@ import org.elasticsearch.index.mapper.NestedLookup; import org.elasticsearch.index.mapper.ProvidedIdFieldMapper; import org.elasticsearch.index.mapper.RoutingFieldMapper; +import org.elasticsearch.index.mapper.RoutingFields; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; @@ -111,8 +112,8 @@ public IdFieldMapper buildIdFieldMapper(BooleanSupplier fieldDataEnabled) { } @Override - public DocumentDimensions buildDocumentDimensions(IndexSettings settings) { - return DocumentDimensions.Noop.INSTANCE; + public RoutingFields buildRoutingFields(IndexSettings settings) { + return RoutingFields.Noop.INSTANCE; } @Override @@ -209,9 +210,9 @@ public IdFieldMapper buildIdFieldMapper(BooleanSupplier fieldDataEnabled) { } @Override - public DocumentDimensions buildDocumentDimensions(IndexSettings settings) { + public RoutingFields buildRoutingFields(IndexSettings settings) { IndexRouting.ExtractFromSource routing = (IndexRouting.ExtractFromSource) settings.getIndexRouting(); - return new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(routing.builder()); + return new RoutingPathFields(routing.builder()); } @Override @@ -287,8 +288,8 @@ public MetadataFieldMapper timeSeriesRoutingHashFieldMapper() { } @Override - public DocumentDimensions buildDocumentDimensions(IndexSettings settings) { - return DocumentDimensions.Noop.INSTANCE; + public RoutingFields buildRoutingFields(IndexSettings settings) { + return RoutingFields.Noop.INSTANCE; } @Override @@ -368,8 +369,8 @@ public IdFieldMapper buildIdFieldMapper(BooleanSupplier fieldDataEnabled) { } @Override - public DocumentDimensions buildDocumentDimensions(IndexSettings settings) { - return DocumentDimensions.Noop.INSTANCE; + public RoutingFields buildRoutingFields(IndexSettings settings) { + return RoutingFields.Noop.INSTANCE; } @Override @@ -524,7 +525,7 @@ public String getName() { /** * How {@code time_series_dimension} fields are handled by indices in this mode. */ - public abstract DocumentDimensions buildDocumentDimensions(IndexSettings settings); + public abstract RoutingFields buildRoutingFields(IndexSettings settings); /** * @return Whether timestamps should be validated for being withing the time range of an index. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index 5aaaf7dce83c9..f74d58093a7f5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -499,7 +499,7 @@ private void indexValue(DocumentParserContext context, Boolean value) { } if (fieldType().isDimension()) { - context.getDimensions().addBoolean(fieldType().name(), value).validate(context.indexSettings()); + context.getRoutingFields().addBoolean(fieldType().name(), value); } if (indexed) { context.doc().add(new StringField(fieldType().name(), value ? Values.TRUE : Values.FALSE, Field.Store.NO)); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentDimensions.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentDimensions.java deleted file mode 100644 index 8f26d21324d9b..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentDimensions.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.mapper; - -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.IndexSettings; - -import java.net.InetAddress; - -/** - * Collects dimensions from documents. - */ -public interface DocumentDimensions { - - /** - * Build an index's DocumentDimensions using its settings - */ - static DocumentDimensions fromIndexSettings(IndexSettings indexSettings) { - return indexSettings.getMode().buildDocumentDimensions(indexSettings); - } - - /** - * This overloaded method tries to take advantage of the fact that the UTF-8 - * value is already computed in some cases when we want to collect - * dimensions, so we can save re-computing the UTF-8 encoding. - */ - DocumentDimensions addString(String fieldName, BytesRef utf8Value); - - default DocumentDimensions addString(String fieldName, String value) { - return addString(fieldName, new BytesRef(value)); - } - - DocumentDimensions addIp(String fieldName, InetAddress value); - - DocumentDimensions addLong(String fieldName, long value); - - DocumentDimensions addUnsignedLong(String fieldName, long value); - - DocumentDimensions addBoolean(String fieldName, boolean value); - - DocumentDimensions validate(IndexSettings settings); - - /** - * Noop implementation that doesn't perform validations on dimension fields - */ - enum Noop implements DocumentDimensions { - - INSTANCE; - - @Override - public DocumentDimensions addString(String fieldName, BytesRef utf8Value) { - return this; - } - - @Override - public DocumentDimensions addString(String fieldName, String value) { - return this; - } - - @Override - public DocumentDimensions addIp(String fieldName, InetAddress value) { - return this; - } - - @Override - public DocumentDimensions addLong(String fieldName, long value) { - return this; - } - - @Override - public DocumentDimensions addUnsignedLong(String fieldName, long value) { - return this; - } - - @Override - public DocumentDimensions addBoolean(String fieldName, boolean value) { - return this; - } - - @Override - public DocumentDimensions validate(IndexSettings settings) { - return this; - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index c84df68a637e2..51e4e9f4c1b5e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -126,7 +126,7 @@ private enum Scope { private final DynamicMapperSize dynamicMappersSize; private final Map dynamicObjectMappers; private final Map> dynamicRuntimeFields; - private final DocumentDimensions dimensions; + private final RoutingFields routingFields; private final ObjectMapper parent; private final ObjectMapper.Dynamic dynamic; private String id; @@ -158,7 +158,7 @@ private DocumentParserContext( String id, Field version, SeqNoFieldMapper.SequenceIDFields seqID, - DocumentDimensions dimensions, + RoutingFields routingFields, ObjectMapper parent, ObjectMapper.Dynamic dynamic, Set fieldsAppliedFromTemplates, @@ -178,7 +178,7 @@ private DocumentParserContext( this.id = id; this.version = version; this.seqID = seqID; - this.dimensions = dimensions; + this.routingFields = routingFields; this.parent = parent; this.dynamic = dynamic; this.fieldsAppliedFromTemplates = fieldsAppliedFromTemplates; @@ -201,7 +201,7 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, in.id, in.version, in.seqID, - in.dimensions, + in.routingFields, parent, dynamic, in.fieldsAppliedFromTemplates, @@ -231,7 +231,7 @@ protected DocumentParserContext( null, null, SeqNoFieldMapper.SequenceIDFields.emptySeqID(), - DocumentDimensions.fromIndexSettings(mappingParserContext.getIndexSettings()), + RoutingFields.fromIndexSettings(mappingParserContext.getIndexSettings()), parent, dynamic, new HashSet<>(), @@ -762,8 +762,8 @@ public XContentParser parser() { /** * The collection of dimensions for this document. */ - public DocumentDimensions getDimensions() { - return dimensions; + public RoutingFields getRoutingFields() { + return routingFields; } public abstract ContentPath path(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index 2efeeba893c6c..09f44f139d8bc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -549,7 +549,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio private void indexValue(DocumentParserContext context, InetAddress address) { if (dimension) { - context.getDimensions().addIp(fieldType().name(), address).validate(context.indexSettings()); + context.getRoutingFields().addIp(fieldType().name(), address); } if (indexed) { Field field = new InetAddressPoint(fieldType().name(), address); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index ecc708bc94614..32aa422b18bcc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -944,7 +944,7 @@ private void indexValue(DocumentParserContext context, String value) { final BytesRef binaryValue = new BytesRef(value); if (fieldType().isDimension()) { - context.getDimensions().addString(fieldType().name(), binaryValue).validate(context.indexSettings()); + context.getRoutingFields().addString(fieldType().name(), binaryValue); } // If the UTF8 encoding of the field value is bigger than the max length 32766, Lucene fill fail the indexing request and, to diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 55ed1e10428aa..8c21dfea31b9a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -1991,7 +1991,7 @@ public Number value(XContentParser parser) throws IllegalArgumentException, IOEx */ public void indexValue(DocumentParserContext context, Number numericValue) { if (dimension && numericValue != null) { - context.getDimensions().addLong(fieldType().name(), numericValue.longValue()).validate(context.indexSettings()); + context.getRoutingFields().addLong(fieldType().name(), numericValue.longValue()); } fieldType().type.addFields(context.doc(), fieldType().name(), numericValue, indexed, hasDocValues, stored); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RoutingFields.java b/server/src/main/java/org/elasticsearch/index/mapper/RoutingFields.java new file mode 100644 index 0000000000000..4d8d8fdcbd296 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/RoutingFields.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.IndexSettings; + +import java.net.InetAddress; + +/** + * Collects fields contributing to routing from documents. + */ +public interface RoutingFields { + + /** + * Collect routing fields from index settings + */ + static RoutingFields fromIndexSettings(IndexSettings indexSettings) { + return indexSettings.getMode().buildRoutingFields(indexSettings); + } + + /** + * This overloaded method tries to take advantage of the fact that the UTF-8 + * value is already computed in some cases when we want to collect + * routing fields, so we can save re-computing the UTF-8 encoding. + */ + RoutingFields addString(String fieldName, BytesRef utf8Value); + + default RoutingFields addString(String fieldName, String value) { + return addString(fieldName, new BytesRef(value)); + } + + RoutingFields addIp(String fieldName, InetAddress value); + + RoutingFields addLong(String fieldName, long value); + + RoutingFields addUnsignedLong(String fieldName, long value); + + RoutingFields addBoolean(String fieldName, boolean value); + + /** + * Noop implementation that doesn't perform validations on routing fields + */ + enum Noop implements RoutingFields { + + INSTANCE; + + @Override + public RoutingFields addString(String fieldName, BytesRef utf8Value) { + return this; + } + + @Override + public RoutingFields addString(String fieldName, String value) { + return this; + } + + @Override + public RoutingFields addIp(String fieldName, InetAddress value) { + return this; + } + + @Override + public RoutingFields addLong(String fieldName, long value) { + return this; + } + + @Override + public RoutingFields addUnsignedLong(String fieldName, long value) { + return this; + } + + @Override + public RoutingFields addBoolean(String fieldName, boolean value) { + return this; + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RoutingPathFields.java b/server/src/main/java/org/elasticsearch/index/mapper/RoutingPathFields.java new file mode 100644 index 0000000000000..73baca1bf3fdb --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/RoutingPathFields.java @@ -0,0 +1,269 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; +import org.elasticsearch.cluster.routing.IndexRouting; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.hash.Murmur3Hasher; +import org.elasticsearch.common.hash.MurmurHash3; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.network.NetworkAddress; +import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.search.DocValueFormat; + +import java.io.IOException; +import java.net.InetAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Implementation of routing fields, using field matching based on the routing path content. + */ +public final class RoutingPathFields implements RoutingFields { + + private static final int SEED = 0; + + private static final int MAX_ROUTING_FIELDS = 512; + + private static final int MAX_HASH_LEN_BYTES = 2; + static { + assert MAX_HASH_LEN_BYTES == StreamOutput.putVInt(new byte[2], hashLen(MAX_ROUTING_FIELDS), 0); + } + + /** + * A map of the serialized values of routing fields that will be used + * for generating the _tsid field. The map will be used by {@link RoutingPathFields} + * to build the _tsid field for the document. + */ + private final SortedMap> routingValues = new TreeMap<>(); + + /** + * Builds the routing. Used for building {@code _id}. If null then skipped. + */ + @Nullable + private final IndexRouting.ExtractFromSource.Builder routingBuilder; + + public RoutingPathFields(@Nullable IndexRouting.ExtractFromSource.Builder routingBuilder) { + this.routingBuilder = routingBuilder; + } + + SortedMap> routingValues() { + return Collections.unmodifiableSortedMap(routingValues); + } + + IndexRouting.ExtractFromSource.Builder routingBuilder() { + return routingBuilder; + } + + /** + * Here we build the hash of the routing values using a similarity function so that we have a result + * with the following pattern: + * + * hash128(concatenate(routing field names)) + + * foreach(routing field value, limit = MAX_ROUTING_FIELDS) { hash32(routing field value) } + + * hash128(concatenate(routing field values)) + * + * The idea is to be able to place 'similar' values close to each other. + */ + public BytesReference buildHash() { + Murmur3Hasher hasher = new Murmur3Hasher(SEED); + + // NOTE: hash all routing field names + int numberOfFields = Math.min(MAX_ROUTING_FIELDS, routingValues.size()); + int len = hashLen(numberOfFields); + // either one or two bytes are occupied by the vint since we're bounded by #MAX_ROUTING_FIELDS + byte[] hash = new byte[MAX_HASH_LEN_BYTES + len]; + int index = StreamOutput.putVInt(hash, len, 0); + + hasher.reset(); + for (final BytesRef name : routingValues.keySet()) { + hasher.update(name.bytes); + } + index = writeHash128(hasher.digestHash(), hash, index); + + // NOTE: concatenate all routing field value hashes up to a certain number of fields + int startIndex = index; + for (final List values : routingValues.values()) { + if ((index - startIndex) >= 4 * numberOfFields) { + break; + } + assert values.isEmpty() == false : "routing values are empty"; + final BytesRef routingValue = values.get(0).toBytesRef(); + ByteUtils.writeIntLE( + StringHelper.murmurhash3_x86_32(routingValue.bytes, routingValue.offset, routingValue.length, SEED), + hash, + index + ); + index += 4; + } + + // NOTE: hash all routing field allValues + hasher.reset(); + for (final List values : routingValues.values()) { + for (BytesReference v : values) { + hasher.update(v.toBytesRef().bytes); + } + } + index = writeHash128(hasher.digestHash(), hash, index); + + return new BytesArray(hash, 0, index); + } + + private static int hashLen(int numberOfFields) { + return 16 + 16 + 4 * numberOfFields; + } + + private static int writeHash128(final MurmurHash3.Hash128 hash128, byte[] buffer, int index) { + ByteUtils.writeLongLE(hash128.h1, buffer, index); + index += 8; + ByteUtils.writeLongLE(hash128.h2, buffer, index); + index += 8; + return index; + } + + @Override + public RoutingFields addString(String fieldName, BytesRef utf8Value) { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.write((byte) 's'); + /* + * Write in utf8 instead of StreamOutput#writeString which is utf-16-ish + * so it's easier for folks to reason about the space taken up. Mostly + * it'll be smaller too. + */ + out.writeBytesRef(utf8Value); + add(fieldName, out.bytes()); + + if (routingBuilder != null) { + routingBuilder.addMatching(fieldName, utf8Value); + } + } catch (IOException e) { + throw new IllegalArgumentException("Routing field cannot be serialized.", e); + } + return this; + } + + @Override + public RoutingFields addIp(String fieldName, InetAddress value) { + return addString(fieldName, NetworkAddress.format(value)); + } + + @Override + public RoutingFields addLong(String fieldName, long value) { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.write((byte) 'l'); + out.writeLong(value); + add(fieldName, out.bytes()); + } catch (IOException e) { + throw new IllegalArgumentException("Routing field cannot be serialized.", e); + } + return this; + } + + @Override + public RoutingFields addUnsignedLong(String fieldName, long value) { + try (BytesStreamOutput out = new BytesStreamOutput()) { + Object ul = DocValueFormat.UNSIGNED_LONG_SHIFTED.format(value); + if (ul instanceof Long l) { + out.write((byte) 'l'); + out.writeLong(l); + } else { + out.write((byte) 'u'); + out.writeLong(value); + } + add(fieldName, out.bytes()); + return this; + } catch (IOException e) { + throw new IllegalArgumentException("Routing field cannot be serialized.", e); + } + } + + @Override + public RoutingFields addBoolean(String fieldName, boolean value) { + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.write((byte) 'b'); + out.write(value ? 't' : 'f'); + add(fieldName, out.bytes()); + } catch (IOException e) { + throw new IllegalArgumentException("Routing field cannot be serialized.", e); + } + return this; + } + + private void add(String fieldName, BytesReference encoded) throws IOException { + BytesRef name = new BytesRef(fieldName); + List values = routingValues.get(name); + if (values == null) { + // optimize for the common case where routing fields are not multi-valued + routingValues.put(name, List.of(encoded)); + } else { + if (values.size() == 1) { + // converts the immutable list that's optimized for the common case of having only one value to a mutable list + BytesReference previousValue = values.get(0); + values = new ArrayList<>(4); + values.add(previousValue); + routingValues.put(name, values); + } + values.add(encoded); + } + } + + public static Map decodeAsMap(BytesRef bytesRef) { + try (StreamInput in = new BytesArray(bytesRef).streamInput()) { + int size = in.readVInt(); + Map result = new LinkedHashMap<>(size); + + for (int i = 0; i < size; i++) { + String name = null; + try { + name = in.readSlicedBytesReference().utf8ToString(); + } catch (AssertionError ae) { + throw new IllegalArgumentException("Error parsing routing field: " + ae.getMessage(), ae); + } + + int type = in.read(); + switch (type) { + case (byte) 's' -> { + // parse a string + try { + result.put(name, in.readSlicedBytesReference().utf8ToString()); + } catch (AssertionError ae) { + throw new IllegalArgumentException("Error parsing routing field: " + ae.getMessage(), ae); + } + } + case (byte) 'l' -> // parse a long + result.put(name, in.readLong()); + case (byte) 'u' -> { // parse an unsigned_long + Object ul = DocValueFormat.UNSIGNED_LONG_SHIFTED.format(in.readLong()); + result.put(name, ul); + } + case (byte) 'd' -> // parse a double + result.put(name, in.readDouble()); + case (byte) 'b' -> // parse a boolean + result.put(name, in.read() == 't'); + default -> throw new IllegalArgumentException("Cannot parse [" + name + "]: Unknown type [" + type + "]"); + } + } + return result; + } catch (IOException | IllegalArgumentException e) { + throw new IllegalArgumentException("Routing field cannot be deserialized:" + e.getMessage(), e); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java index a6b2ad265decf..8af3c3e6ec270 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java @@ -12,21 +12,11 @@ import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.StringHelper; -import org.elasticsearch.cluster.routing.IndexRouting; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.hash.Murmur3Hasher; -import org.elasticsearch.common.hash.MurmurHash3; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.network.NetworkAddress; -import org.elasticsearch.common.util.ByteUtils; -import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexMode; -import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldData; @@ -40,15 +30,10 @@ import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import java.io.IOException; -import java.net.InetAddress; import java.time.ZoneId; -import java.util.ArrayList; import java.util.Collections; -import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.SortedMap; -import java.util.TreeMap; /** * Mapper for {@code _tsid} field included generated when the index is @@ -136,15 +121,24 @@ private TimeSeriesIdFieldMapper() { public void postParse(DocumentParserContext context) throws IOException { assert fieldType().isIndexed() == false; - final TimeSeriesIdBuilder timeSeriesIdBuilder = (TimeSeriesIdBuilder) context.getDimensions(); - final BytesRef timeSeriesId = getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ID_HASHING) - ? timeSeriesIdBuilder.buildLegacyTsid().toBytesRef() - : timeSeriesIdBuilder.buildTsidHash().toBytesRef(); + final RoutingPathFields routingPathFields = (RoutingPathFields) context.getRoutingFields(); + final BytesRef timeSeriesId; + if (getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ID_HASHING)) { + long limit = context.indexSettings().getValue(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING); + int size = routingPathFields.routingValues().size(); + if (size > limit) { + throw new MapperException("Too many dimension fields [" + size + "], max [" + limit + "] dimension fields allowed"); + } + timeSeriesId = buildLegacyTsid(routingPathFields).toBytesRef(); + } else { + timeSeriesId = routingPathFields.buildHash().toBytesRef(); + } context.doc().add(new SortedDocValuesField(fieldType().name(), timeSeriesId)); + TsidExtractingIdFieldMapper.createField( context, getIndexVersionCreated(context).before(IndexVersions.TIME_SERIES_ROUTING_HASH_IN_ID) - ? timeSeriesIdBuilder.routingBuilder + ? routingPathFields.routingBuilder() : null, timeSeriesId ); @@ -170,231 +164,6 @@ public static Object encodeTsid(StreamInput in) { } } - public static class TimeSeriesIdBuilder implements DocumentDimensions { - - private static final int SEED = 0; - - public static final int MAX_DIMENSIONS = 512; - - private final Murmur3Hasher tsidHasher = new Murmur3Hasher(0); - - /** - * A map of the serialized values of dimension fields that will be used - * for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper} - * to build the _tsid field for the document. - */ - private final SortedMap> dimensions = new TreeMap<>(); - /** - * Builds the routing. Used for building {@code _id}. If null then skipped. - */ - @Nullable - private final IndexRouting.ExtractFromSource.Builder routingBuilder; - - public TimeSeriesIdBuilder(@Nullable IndexRouting.ExtractFromSource.Builder routingBuilder) { - this.routingBuilder = routingBuilder; - } - - public BytesReference buildLegacyTsid() throws IOException { - if (dimensions.isEmpty()) { - throw new IllegalArgumentException("Dimension fields are missing."); - } - - try (BytesStreamOutput out = new BytesStreamOutput()) { - out.writeVInt(dimensions.size()); - for (Map.Entry> entry : dimensions.entrySet()) { - out.writeBytesRef(entry.getKey()); - List value = entry.getValue(); - if (value.size() > 1) { - // multi-value dimensions are only supported for newer indices that use buildTsidHash - throw new IllegalArgumentException( - "Dimension field [" + entry.getKey().utf8ToString() + "] cannot be a multi-valued field." - ); - } - assert value.isEmpty() == false : "dimension value is empty"; - value.get(0).writeTo(out); - } - return out.bytes(); - } - } - - private static final int MAX_HASH_LEN_BYTES = 2; - - static { - assert MAX_HASH_LEN_BYTES == StreamOutput.putVInt(new byte[2], tsidHashLen(MAX_DIMENSIONS), 0); - } - - /** - * Here we build the hash of the tsid using a similarity function so that we have a result - * with the following pattern: - * - * hash128(catenate(dimension field names)) + - * foreach(dimension field value, limit = MAX_DIMENSIONS) { hash32(dimension field value) } + - * hash128(catenate(dimension field values)) - * - * The idea is to be able to place 'similar' time series close to each other. Two time series - * are considered 'similar' if they share the same dimensions (names and values). - */ - public BytesReference buildTsidHash() { - // NOTE: hash all dimension field names - int numberOfDimensions = Math.min(MAX_DIMENSIONS, dimensions.size()); - int len = tsidHashLen(numberOfDimensions); - // either one or two bytes are occupied by the vint since we're bounded by #MAX_DIMENSIONS - byte[] tsidHash = new byte[MAX_HASH_LEN_BYTES + len]; - int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0); - - tsidHasher.reset(); - for (final BytesRef name : dimensions.keySet()) { - tsidHasher.update(name.bytes); - } - tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); - - // NOTE: concatenate all dimension value hashes up to a certain number of dimensions - int tsidHashStartIndex = tsidHashIndex; - for (final List values : dimensions.values()) { - if ((tsidHashIndex - tsidHashStartIndex) >= 4 * numberOfDimensions) { - break; - } - assert values.isEmpty() == false : "dimension values are empty"; - final BytesRef dimensionValueBytesRef = values.get(0).toBytesRef(); - ByteUtils.writeIntLE( - StringHelper.murmurhash3_x86_32( - dimensionValueBytesRef.bytes, - dimensionValueBytesRef.offset, - dimensionValueBytesRef.length, - SEED - ), - tsidHash, - tsidHashIndex - ); - tsidHashIndex += 4; - } - - // NOTE: hash all dimension field allValues - tsidHasher.reset(); - for (final List values : dimensions.values()) { - for (BytesReference v : values) { - tsidHasher.update(v.toBytesRef().bytes); - } - } - tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); - - return new BytesArray(tsidHash, 0, tsidHashIndex); - } - - private static int tsidHashLen(int numberOfDimensions) { - return 16 + 16 + 4 * numberOfDimensions; - } - - private int writeHash128(final MurmurHash3.Hash128 hash128, byte[] buffer, int tsidHashIndex) { - ByteUtils.writeLongLE(hash128.h1, buffer, tsidHashIndex); - tsidHashIndex += 8; - ByteUtils.writeLongLE(hash128.h2, buffer, tsidHashIndex); - tsidHashIndex += 8; - return tsidHashIndex; - } - - @Override - public DocumentDimensions addString(String fieldName, BytesRef utf8Value) { - try (BytesStreamOutput out = new BytesStreamOutput()) { - out.write((byte) 's'); - /* - * Write in utf8 instead of StreamOutput#writeString which is utf-16-ish - * so it's easier for folks to reason about the space taken up. Mostly - * it'll be smaller too. - */ - out.writeBytesRef(utf8Value); - add(fieldName, out.bytes()); - - if (routingBuilder != null) { - routingBuilder.addMatching(fieldName, utf8Value); - } - } catch (IOException e) { - throw new IllegalArgumentException("Dimension field cannot be serialized.", e); - } - return this; - } - - @Override - public DocumentDimensions addIp(String fieldName, InetAddress value) { - return addString(fieldName, NetworkAddress.format(value)); - } - - @Override - public DocumentDimensions addLong(String fieldName, long value) { - try (BytesStreamOutput out = new BytesStreamOutput()) { - out.write((byte) 'l'); - out.writeLong(value); - add(fieldName, out.bytes()); - } catch (IOException e) { - throw new IllegalArgumentException("Dimension field cannot be serialized.", e); - } - return this; - } - - @Override - public DocumentDimensions addUnsignedLong(String fieldName, long value) { - try (BytesStreamOutput out = new BytesStreamOutput()) { - Object ul = DocValueFormat.UNSIGNED_LONG_SHIFTED.format(value); - if (ul instanceof Long l) { - out.write((byte) 'l'); - out.writeLong(l); - } else { - out.write((byte) 'u'); - out.writeLong(value); - } - add(fieldName, out.bytes()); - return this; - } catch (IOException e) { - throw new IllegalArgumentException("Dimension field cannot be serialized.", e); - } - } - - @Override - public DocumentDimensions addBoolean(String fieldName, boolean value) { - try (BytesStreamOutput out = new BytesStreamOutput()) { - out.write((byte) 'b'); - out.write(value ? 't' : 'f'); - add(fieldName, out.bytes()); - } catch (IOException e) { - throw new IllegalArgumentException("Dimension field cannot be serialized.", e); - } - return this; - } - - @Override - public DocumentDimensions validate(final IndexSettings settings) { - if (settings.getIndexVersionCreated().before(IndexVersions.TIME_SERIES_ID_HASHING) - && dimensions.size() > settings.getValue(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING)) { - throw new MapperException( - "Too many dimension fields [" - + dimensions.size() - + "], max [" - + settings.getValue(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING) - + "] dimension fields allowed" - ); - } - return this; - } - - private void add(String fieldName, BytesReference encoded) throws IOException { - BytesRef name = new BytesRef(fieldName); - List values = dimensions.get(name); - if (values == null) { - // optimize for the common case where dimensions are not multi-valued - dimensions.put(name, List.of(encoded)); - } else { - if (values.size() == 1) { - // converts the immutable list that's optimized for the common case of having only one value to a mutable list - BytesReference previousValue = values.get(0); - values = new ArrayList<>(4); - values.add(previousValue); - dimensions.put(name, values); - } - values.add(encoded); - } - } - } - public static Object encodeTsid(final BytesRef bytesRef) { return base64Encode(bytesRef); } @@ -405,53 +174,27 @@ private static String base64Encode(final BytesRef bytesRef) { return Strings.BASE_64_NO_PADDING_URL_ENCODER.encodeToString(bytes); } - public static Map decodeTsidAsMap(BytesRef bytesRef) { - try (StreamInput input = new BytesArray(bytesRef).streamInput()) { - return decodeTsidAsMap(input); - } catch (IOException ex) { - throw new IllegalArgumentException("Dimension field cannot be deserialized.", ex); - } - } - - public static Map decodeTsidAsMap(StreamInput in) { - try { - int size = in.readVInt(); - Map result = new LinkedHashMap<>(size); - - for (int i = 0; i < size; i++) { - String name = null; - try { - name = in.readSlicedBytesReference().utf8ToString(); - } catch (AssertionError ae) { - throw new IllegalArgumentException("Error parsing keyword dimension: " + ae.getMessage(), ae); - } - - int type = in.read(); - switch (type) { - case (byte) 's' -> { - // parse a string - try { - result.put(name, in.readSlicedBytesReference().utf8ToString()); - } catch (AssertionError ae) { - throw new IllegalArgumentException("Error parsing keyword dimension: " + ae.getMessage(), ae); - } - } - case (byte) 'l' -> // parse a long - result.put(name, in.readLong()); - case (byte) 'u' -> { // parse an unsigned_long - Object ul = DocValueFormat.UNSIGNED_LONG_SHIFTED.format(in.readLong()); - result.put(name, ul); - } - case (byte) 'd' -> // parse a double - result.put(name, in.readDouble()); - case (byte) 'b' -> // parse a boolean - result.put(name, in.read() == 't'); - default -> throw new IllegalArgumentException("Cannot parse [" + name + "]: Unknown type [" + type + "]"); + public static BytesReference buildLegacyTsid(RoutingPathFields routingPathFields) throws IOException { + SortedMap> routingValues = routingPathFields.routingValues(); + if (routingValues.isEmpty()) { + throw new IllegalArgumentException("Dimension fields are missing."); + } + + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeVInt(routingValues.size()); + for (var entry : routingValues.entrySet()) { + out.writeBytesRef(entry.getKey()); + List value = entry.getValue(); + if (value.size() > 1) { + // multi-value dimensions are only supported for newer indices that use buildTsidHash + throw new IllegalArgumentException( + "Dimension field [" + entry.getKey().utf8ToString() + "] cannot be a multi-valued field." + ); } + assert value.isEmpty() == false : "dimension value is empty"; + value.get(0).writeTo(out); } - return result; - } catch (IOException | IllegalArgumentException e) { - throw new IllegalArgumentException("Error formatting " + NAME + ": " + e.getMessage(), e); + return out.bytes(); } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java index 351e3149da3df..93ef04ddd159a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldParser.java @@ -184,10 +184,7 @@ private void addField(Context context, ContentPath path, String currentName, Str final String keyedFieldName = FlattenedFieldParser.extractKey(bytesKeyedValue).utf8ToString(); if (fieldType.isDimension() && fieldType.dimensions().contains(keyedFieldName)) { final BytesRef keyedFieldValue = FlattenedFieldParser.extractValue(bytesKeyedValue); - context.documentParserContext() - .getDimensions() - .addString(rootFieldFullPath + "." + keyedFieldName, keyedFieldValue) - .validate(context.documentParserContext().indexSettings()); + context.documentParserContext().getRoutingFields().addString(rootFieldFullPath + "." + keyedFieldName, keyedFieldValue); } } } diff --git a/server/src/main/java/org/elasticsearch/search/DocValueFormat.java b/server/src/main/java/org/elasticsearch/search/DocValueFormat.java index bdefee988248f..51f52326907eb 100644 --- a/server/src/main/java/org/elasticsearch/search/DocValueFormat.java +++ b/server/src/main/java/org/elasticsearch/search/DocValueFormat.java @@ -22,8 +22,8 @@ import org.elasticsearch.common.util.LocaleUtils; import org.elasticsearch.geometry.utils.Geohash; import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder; import org.elasticsearch.search.aggregations.bucket.geogrid.GeoTileUtils; import java.io.IOException; @@ -729,7 +729,7 @@ public Object format(BytesRef value) { try { // NOTE: if the tsid is a map of dimension key/value pairs (as it was before introducing // tsid hashing) we just decode the map and return it. - return TimeSeriesIdFieldMapper.decodeTsidAsMap(value); + return RoutingPathFields.decodeAsMap(value); } catch (Exception e) { // NOTE: otherwise the _tsid field is just a hash and we can't decode it return TimeSeriesIdFieldMapper.encodeTsid(value); @@ -760,20 +760,20 @@ private BytesRef parseBytesRefMap(Object value) { } Map m = (Map) value; - TimeSeriesIdBuilder builder = new TimeSeriesIdBuilder(null); + RoutingPathFields routingPathFields = new RoutingPathFields(null); for (Map.Entry entry : m.entrySet()) { String f = entry.getKey().toString(); Object v = entry.getValue(); if (v instanceof String s) { - builder.addString(f, s); + routingPathFields.addString(f, s); } else if (v instanceof Long l) { - builder.addLong(f, l); + routingPathFields.addLong(f, l); } else if (v instanceof Integer i) { - builder.addLong(f, i.longValue()); + routingPathFields.addLong(f, i.longValue()); } else if (v instanceof BigInteger ul) { long ll = UNSIGNED_LONG_SHIFTED.parseLong(ul.toString(), false, () -> 0L); - builder.addUnsignedLong(f, ll); + routingPathFields.addUnsignedLong(f, ll); } else { throw new IllegalArgumentException("Unexpected value in tsid object [" + v + "]"); } @@ -781,7 +781,7 @@ private BytesRef parseBytesRefMap(Object value) { try { // NOTE: we can decode the tsid only if it is not hashed (represented as a map) - return builder.buildLegacyTsid().toBytesRef(); + return TimeSeriesIdFieldMapper.buildLegacyTsid(routingPathFields).toBytesRef(); } catch (IOException e) { throw new IllegalArgumentException(e); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregation.java b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregation.java index 38cab1761d409..b829afb0c23b0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregation.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/InternalAggregation.java @@ -35,7 +35,6 @@ */ public abstract class InternalAggregation implements Aggregation, NamedWriteable { protected final String name; - protected final Map metadata; /** @@ -53,12 +52,14 @@ protected InternalAggregation(String name, Map metadata) { */ protected InternalAggregation(StreamInput in) throws IOException { final String name = in.readString(); + final Map metadata = in.readGenericMap(); if (in instanceof DelayableWriteable.Deduplicator d) { this.name = d.deduplicate(name); + this.metadata = metadata == null || metadata.isEmpty() ? metadata : d.deduplicate(metadata); } else { this.name = name; + this.metadata = metadata; } - metadata = in.readGenericMap(); } @Override diff --git a/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java b/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java index a97d22a976631..32634043cfc98 100644 --- a/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java +++ b/server/src/main/java/org/elasticsearch/threadpool/DefaultBuiltInExecutorBuilders.java @@ -68,7 +68,7 @@ public Map getBuilders(Settings settings, int allocated settings, ThreadPool.Names.SEARCH, searchOrGetThreadPoolSize, - 1000, + searchOrGetThreadPoolSize * 1000, new EsExecutors.TaskTrackingConfig(true, searchAutoscalingEWMA) ) ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java index b07ec8e7cb683..083efccceec16 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IdLoaderTests.java @@ -27,11 +27,7 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.cluster.metadata.IndexMetadata; -import org.elasticsearch.cluster.routing.IndexRouting; import org.elasticsearch.core.CheckedConsumer; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.IndexVersion; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -72,8 +68,6 @@ public void testSynthesizeIdSimple() throws Exception { } public void testSynthesizeIdMultipleSegments() throws Exception { - var routingPaths = List.of("dim1"); - var routing = createRouting(routingPaths); var idLoader = IdLoader.createTsIdLoader(null, null); long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z"); @@ -144,8 +138,6 @@ public void testSynthesizeIdMultipleSegments() throws Exception { } public void testSynthesizeIdRandom() throws Exception { - var routingPaths = List.of("dim1"); - var routing = createRouting(routingPaths); var idLoader = IdLoader.createTsIdLoader(null, null); long startTime = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.parseMillis("2023-01-01T00:00:00Z"); @@ -153,7 +145,6 @@ public void testSynthesizeIdRandom() throws Exception { List randomDocs = new ArrayList<>(); int numberOfTimeSeries = randomIntBetween(8, 64); for (int i = 0; i < numberOfTimeSeries; i++) { - long routingId = 0; int numberOfDimensions = randomIntBetween(1, 6); List dimensions = new ArrayList<>(numberOfDimensions); for (int j = 1; j <= numberOfDimensions; j++) { @@ -165,7 +156,6 @@ public void testSynthesizeIdRandom() throws Exception { value = randomAlphaOfLength(4); } dimensions.add(new Dimension(fieldName, value)); - routingId = value.hashCode(); } int numberOfSamples = randomIntBetween(1, 16); for (int j = 0; j < numberOfSamples; j++) { @@ -225,21 +215,21 @@ private void prepareIndexReader( } private static void indexDoc(IndexWriter iw, Doc doc, int routingHash) throws IOException { - final TimeSeriesIdFieldMapper.TimeSeriesIdBuilder builder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); + var routingFields = new RoutingPathFields(null); final List fields = new ArrayList<>(); fields.add(new SortedNumericDocValuesField(DataStreamTimestampFieldMapper.DEFAULT_PATH, doc.timestamp)); fields.add(new LongPoint(DataStreamTimestampFieldMapper.DEFAULT_PATH, doc.timestamp)); for (Dimension dimension : doc.dimensions) { if (dimension.value instanceof Number n) { - builder.addLong(dimension.field, n.longValue()); + routingFields.addLong(dimension.field, n.longValue()); fields.add(new SortedNumericDocValuesField(dimension.field, ((Number) dimension.value).longValue())); } else { - builder.addString(dimension.field, dimension.value.toString()); + routingFields.addString(dimension.field, dimension.value.toString()); fields.add(new SortedSetDocValuesField(dimension.field, new BytesRef(dimension.value.toString()))); } } - BytesRef tsid = builder.buildTsidHash().toBytesRef(); + BytesRef tsid = routingFields.buildHash().toBytesRef(); fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, tsid)); fields.add( new SortedDocValuesField( @@ -251,25 +241,15 @@ private static void indexDoc(IndexWriter iw, Doc doc, int routingHash) throws IO } private static String expectedId(Doc doc, int routingHash) throws IOException { - var timeSeriesIdBuilder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); + var routingFields = new RoutingPathFields(null); for (Dimension dimension : doc.dimensions) { if (dimension.value instanceof Number n) { - timeSeriesIdBuilder.addLong(dimension.field, n.longValue()); + routingFields.addLong(dimension.field, n.longValue()); } else { - timeSeriesIdBuilder.addString(dimension.field, dimension.value.toString()); + routingFields.addString(dimension.field, dimension.value.toString()); } } - return TsidExtractingIdFieldMapper.createId(routingHash, timeSeriesIdBuilder.buildTsidHash().toBytesRef(), doc.timestamp); - } - - private static IndexRouting.ExtractFromSource createRouting(List routingPaths) { - var settings = indexSettings(IndexVersion.current(), 2, 1).put(IndexSettings.MODE.getKey(), "time_series") - .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), "2000-01-01T00:00:00.000Z") - .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), "2001-01-01T00:00:00.000Z") - .putList(IndexMetadata.INDEX_ROUTING_PATH.getKey(), routingPaths) - .build(); - var indexMetadata = IndexMetadata.builder("index").settings(settings).build(); - return (IndexRouting.ExtractFromSource) IndexRouting.fromIndexMetadata(indexMetadata); + return TsidExtractingIdFieldMapper.createId(routingHash, routingFields.buildHash().toBytesRef(), doc.timestamp); } record Doc(long timestamp, List dimensions) {} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/RoutingPathFieldsTests.java b/server/src/test/java/org/elasticsearch/index/mapper/RoutingPathFieldsTests.java new file mode 100644 index 0000000000000..2c2c0d160c904 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/mapper/RoutingPathFieldsTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.mapper; + +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.routing.IndexRouting; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.test.ESTestCase; + +public class RoutingPathFieldsTests extends ESTestCase { + + public void testWithBuilder() throws Exception { + IndexSettings settings = new IndexSettings( + IndexMetadata.builder("test") + .settings( + indexSettings(IndexVersion.current(), 1, 1).put( + Settings.builder().put("index.mode", "time_series").put("index.routing_path", "path.*").build() + ) + ) + .build(), + Settings.EMPTY + ); + IndexRouting.ExtractFromSource routing = (IndexRouting.ExtractFromSource) settings.getIndexRouting(); + + var routingPathFields = new RoutingPathFields(routing.builder()); + BytesReference current, previous; + + routingPathFields.addString("path.string_name", randomAlphaOfLengthBetween(1, 10)); + current = previous = routingPathFields.buildHash(); + assertNotNull(current); + + routingPathFields.addBoolean("path.boolean_name", randomBoolean()); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addLong("path.long_name", randomLong()); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addIp("path.ip_name", randomIp(randomBoolean())); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addUnsignedLong("path.unsigned_long_name", randomLongBetween(0, Long.MAX_VALUE)); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + assertArrayEquals(current.array(), routingPathFields.buildHash().array()); + } + + public void testWithoutBuilder() throws Exception { + var routingPathFields = new RoutingPathFields(null); + BytesReference current, previous; + + routingPathFields.addString("path.string_name", randomAlphaOfLengthBetween(1, 10)); + current = previous = routingPathFields.buildHash(); + assertNotNull(current); + + routingPathFields.addBoolean("path.boolean_name", randomBoolean()); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addLong("path.long_name", randomLong()); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addIp("path.ip_name", randomIp(randomBoolean())); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + previous = current; + + routingPathFields.addUnsignedLong("path.unsigned_long_name", randomLongBetween(0, Long.MAX_VALUE)); + current = routingPathFields.buildHash(); + assertTrue(current.length() > previous.length()); + assertArrayEquals(current.array(), routingPathFields.buildHash().array()); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java b/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java index 5371893993318..e81066a731d2e 100644 --- a/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java +++ b/server/src/test/java/org/elasticsearch/search/DocValueFormatTests.java @@ -20,7 +20,7 @@ import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.index.mapper.DateFieldMapper.Resolution; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper.TimeSeriesIdBuilder; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.test.ESTestCase; @@ -379,11 +379,11 @@ public void testParseZone() { } public void testParseTsid() throws IOException { - TimeSeriesIdBuilder timeSeriesIdBuilder = new TimeSeriesIdBuilder(null); - timeSeriesIdBuilder.addString("string", randomAlphaOfLength(10)); - timeSeriesIdBuilder.addLong("long", randomLong()); - timeSeriesIdBuilder.addUnsignedLong("ulong", randomLong()); - BytesRef expected = timeSeriesIdBuilder.buildTsidHash().toBytesRef(); + var routingFields = new RoutingPathFields(null); + routingFields.addString("string", randomAlphaOfLength(10)); + routingFields.addLong("long", randomLong()); + routingFields.addUnsignedLong("ulong", randomLong()); + BytesRef expected = routingFields.buildHash().toBytesRef(); byte[] expectedBytes = new byte[expected.length]; System.arraycopy(expected.bytes, 0, expectedBytes, 0, expected.length); BytesRef actual = DocValueFormat.TIME_SERIES_ID.parseBytesRef(expected); diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/TimeSeriesRateAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/TimeSeriesRateAggregatorTests.java index 3c7a18de536bc..e684092099948 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/TimeSeriesRateAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/rate/TimeSeriesRateAggregatorTests.java @@ -24,7 +24,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; -import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.search.aggregations.AggregatorTestCase; @@ -176,9 +176,9 @@ private List docs(long startTimestamp, String dim, long... values) thr } private static BytesReference tsid(String dim) throws IOException { - TimeSeriesIdFieldMapper.TimeSeriesIdBuilder idBuilder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); - idBuilder.addString("dim", dim); - return idBuilder.buildTsidHash(); + var routingFields = new RoutingPathFields(null); + routingFields.addString("dim", dim); + return routingFields.buildHash(); } private Document doc(long timestamp, BytesReference tsid, long counterValue, String dim) { diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java index c80f26cda7b36..d13f3cda2a82c 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationChecks.java @@ -90,7 +90,7 @@ private DeprecationChecks() {} NodeDeprecationChecks::checkWatcherBulkConcurrentRequestsSetting ); - static List> INDEX_SETTINGS_CHECKS = List.of( + static List> INDEX_SETTINGS_CHECKS = List.of( IndexDeprecationChecks::oldIndicesCheck, IndexDeprecationChecks::translogRetentionSettingCheck, IndexDeprecationChecks::checkIndexDataPath, diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java index cd26e23394e81..87d0bfb93e18c 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/DeprecationInfoAction.java @@ -274,7 +274,7 @@ public static DeprecationInfoAction.Response from( IndexNameExpressionResolver indexNameExpressionResolver, Request request, NodesDeprecationCheckResponse nodeDeprecationResponse, - List> indexSettingsChecks, + List> indexSettingsChecks, List> dataStreamChecks, List> clusterSettingsChecks, Map> pluginSettingIssues, @@ -293,7 +293,10 @@ public static DeprecationInfoAction.Response from( Map> indexSettingsIssues = new HashMap<>(); for (String concreteIndex : concreteIndexNames) { IndexMetadata indexMetadata = stateWithSkippedSettingsRemoved.getMetadata().index(concreteIndex); - List singleIndexIssues = filterChecks(indexSettingsChecks, c -> c.apply(indexMetadata)); + List singleIndexIssues = filterChecks( + indexSettingsChecks, + c -> c.apply(indexMetadata, stateWithSkippedSettingsRemoved) + ); if (singleIndexIssues.size() > 0) { indexSettingsIssues.put(concreteIndex, singleIndexIssues); } diff --git a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java index 3da32c7f5a4c2..8144d960df2e8 100644 --- a/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java +++ b/x-pack/plugin/deprecation/src/main/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecks.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.deprecation; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.MappingMetadata; import org.elasticsearch.common.time.DateFormatter; @@ -30,14 +31,15 @@ */ public class IndexDeprecationChecks { - static DeprecationIssue oldIndicesCheck(IndexMetadata indexMetadata) { + static DeprecationIssue oldIndicesCheck(IndexMetadata indexMetadata, ClusterState clusterState) { // TODO: this check needs to be revised. It's trivially true right now. IndexVersion currentCompatibilityVersion = indexMetadata.getCompatibilityVersion(); - if (currentCompatibilityVersion.before(IndexVersions.V_7_0_0)) { + // We intentionally exclude indices that are in data streams because they will be picked up by DataStreamDeprecationChecks + if (currentCompatibilityVersion.before(IndexVersions.V_8_0_0) && isNotDataStreamIndex(indexMetadata, clusterState)) { return new DeprecationIssue( DeprecationIssue.Level.CRITICAL, - "Old index with a compatibility version < 7.0", - "https://www.elastic.co/guide/en/elasticsearch/reference/master/" + "breaking-changes-8.0.html", + "Old index with a compatibility version < 8.0", + "https://www.elastic.co/guide/en/elasticsearch/reference/master/breaking-changes-9.0.html", "This index has version: " + currentCompatibilityVersion.toReleaseVersion(), false, null @@ -46,7 +48,11 @@ static DeprecationIssue oldIndicesCheck(IndexMetadata indexMetadata) { return null; } - static DeprecationIssue translogRetentionSettingCheck(IndexMetadata indexMetadata) { + private static boolean isNotDataStreamIndex(IndexMetadata indexMetadata, ClusterState clusterState) { + return clusterState.metadata().findDataStreams(indexMetadata.getIndex().getName()).isEmpty(); + } + + static DeprecationIssue translogRetentionSettingCheck(IndexMetadata indexMetadata, ClusterState clusterState) { final boolean softDeletesEnabled = IndexSettings.INDEX_SOFT_DELETES_SETTING.get(indexMetadata.getSettings()); if (softDeletesEnabled) { if (IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.exists(indexMetadata.getSettings()) @@ -73,7 +79,7 @@ static DeprecationIssue translogRetentionSettingCheck(IndexMetadata indexMetadat return null; } - static DeprecationIssue checkIndexDataPath(IndexMetadata indexMetadata) { + static DeprecationIssue checkIndexDataPath(IndexMetadata indexMetadata, ClusterState clusterState) { if (IndexMetadata.INDEX_DATA_PATH_SETTING.exists(indexMetadata.getSettings())) { final String message = String.format( Locale.ROOT, @@ -88,7 +94,7 @@ static DeprecationIssue checkIndexDataPath(IndexMetadata indexMetadata) { return null; } - static DeprecationIssue storeTypeSettingCheck(IndexMetadata indexMetadata) { + static DeprecationIssue storeTypeSettingCheck(IndexMetadata indexMetadata, ClusterState clusterState) { final String storeType = IndexModule.INDEX_STORE_TYPE_SETTING.get(indexMetadata.getSettings()); if (IndexModule.Type.SIMPLEFS.match(storeType)) { return new DeprecationIssue( @@ -105,7 +111,7 @@ static DeprecationIssue storeTypeSettingCheck(IndexMetadata indexMetadata) { return null; } - static DeprecationIssue frozenIndexSettingCheck(IndexMetadata indexMetadata) { + static DeprecationIssue frozenIndexSettingCheck(IndexMetadata indexMetadata, ClusterState clusterState) { Boolean isIndexFrozen = FrozenEngine.INDEX_FROZEN.get(indexMetadata.getSettings()); if (Boolean.TRUE.equals(isIndexFrozen)) { String indexName = indexMetadata.getIndex().getName(); @@ -195,7 +201,7 @@ static List findInPropertiesRecursively( return issues; } - static DeprecationIssue deprecatedCamelCasePattern(IndexMetadata indexMetadata) { + static DeprecationIssue deprecatedCamelCasePattern(IndexMetadata indexMetadata, ClusterState clusterState) { List fields = new ArrayList<>(); fieldLevelMappingIssue( indexMetadata, diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java index 5750daa8e3673..67950f3b9f623 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/DeprecationInfoActionResponseTests.java @@ -117,7 +117,9 @@ public void testFrom() throws IOException { boolean dataStreamIssueFound = randomBoolean(); DeprecationIssue foundIssue = createTestDeprecationIssue(); List> clusterSettingsChecks = List.of((s) -> clusterIssueFound ? foundIssue : null); - List> indexSettingsChecks = List.of((idx) -> indexIssueFound ? foundIssue : null); + List> indexSettingsChecks = List.of( + (idx, cs) -> indexIssueFound ? foundIssue : null + ); List> dataStreamChecks = List.of( (ds, cs) -> dataStreamIssueFound ? foundIssue : null ); @@ -211,7 +213,7 @@ public void testFromWithMergeableNodeIssues() throws IOException { DeprecationIssue foundIssue1 = createTestDeprecationIssue(metaMap1); DeprecationIssue foundIssue2 = createTestDeprecationIssue(foundIssue1, metaMap2); List> clusterSettingsChecks = Collections.emptyList(); - List> indexSettingsChecks = List.of((idx) -> null); + List> indexSettingsChecks = List.of((idx, cs) -> null); List> dataStreamChecks = List.of((ds, cs) -> null); NodesDeprecationCheckResponse nodeDeprecationIssues = new NodesDeprecationCheckResponse( @@ -276,10 +278,12 @@ public void testRemoveSkippedSettings() throws IOException { return null; })); AtomicReference visibleIndexSettings = new AtomicReference<>(); - List> indexSettingsChecks = Collections.unmodifiableList(Arrays.asList((idx) -> { - visibleIndexSettings.set(idx.getSettings()); - return null; - })); + List> indexSettingsChecks = Collections.unmodifiableList( + Arrays.asList((idx, cs) -> { + visibleIndexSettings.set(idx.getSettings()); + return null; + }) + ); AtomicInteger backingIndicesCount = new AtomicInteger(0); List> dataStreamChecks = Collections.unmodifiableList( Arrays.asList((ds, cs) -> { diff --git a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java index 18872d00d54a0..48cbef6831a2b 100644 --- a/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java +++ b/x-pack/plugin/deprecation/src/test/java/org/elasticsearch/xpack/deprecation/IndexDeprecationChecksTests.java @@ -7,8 +7,15 @@ package org.elasticsearch.xpack.deprecation; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.cluster.metadata.DataStreamMetadata; +import org.elasticsearch.cluster.metadata.DataStreamOptions; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; @@ -19,39 +26,89 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import static java.util.Collections.singletonList; import static org.elasticsearch.xpack.deprecation.DeprecationChecks.INDEX_SETTINGS_CHECKS; import static org.hamcrest.Matchers.empty; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasItem; import static org.hamcrest.collection.IsIterableContainingInOrder.contains; public class IndexDeprecationChecksTests extends ESTestCase { public void testOldIndicesCheck() { - IndexVersion createdWith = IndexVersion.fromId(1000099); + IndexVersion createdWith = IndexVersion.fromId(7170099); IndexMetadata indexMetadata = IndexMetadata.builder("test") .settings(settings(createdWith)) .numberOfShards(1) .numberOfReplicas(0) .build(); + ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder().put(indexMetadata, true)) + .build(); DeprecationIssue expected = new DeprecationIssue( DeprecationIssue.Level.CRITICAL, - "Old index with a compatibility version < 7.0", - "https://www.elastic.co/guide/en/elasticsearch/reference/master/" + "breaking-changes-8.0.html", + "Old index with a compatibility version < 8.0", + "https://www.elastic.co/guide/en/elasticsearch/reference/master/breaking-changes-9.0.html", "This index has version: " + createdWith.toReleaseVersion(), false, null ); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata, clusterState)); assertEquals(singletonList(expected), issues); } + public void testOldIndicesCheckDataStreamIndex() { + IndexVersion createdWith = IndexVersion.fromId(7170099); + IndexMetadata indexMetadata = IndexMetadata.builder(".ds-test") + .settings(settings(createdWith).put("index.hidden", true)) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + DataStream dataStream = new DataStream( + randomAlphaOfLength(10), + List.of(indexMetadata.getIndex()), + randomNegativeLong(), + Map.of(), + randomBoolean(), + false, + false, + randomBoolean(), + randomFrom(IndexMode.values()), + null, + randomFrom(DataStreamOptions.EMPTY, DataStreamOptions.FAILURE_STORE_DISABLED, DataStreamOptions.FAILURE_STORE_ENABLED, null), + List.of(), + randomBoolean(), + null + ); + ClusterState clusterState = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata( + Metadata.builder() + .put(indexMetadata, true) + .customs( + Map.of( + DataStreamMetadata.TYPE, + new DataStreamMetadata( + ImmutableOpenMap.builder(Map.of("my-data-stream", dataStream)).build(), + ImmutableOpenMap.of() + ) + ) + ) + ) + .build(); + List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata, clusterState)); + assertThat(issues.size(), equalTo(0)); + } + public void testTranslogRetentionSettings() { Settings.Builder settings = settings(IndexVersion.current()); settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_AGE_SETTING.getKey(), randomPositiveTimeValue()); settings.put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), between(1, 1024) + "b"); IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(indexMetadata, ClusterState.EMPTY_STATE) + ); assertThat( issues, contains( @@ -81,7 +138,10 @@ public void testDefaultTranslogRetentionSettings() { settings.put(IndexSettings.INDEX_SOFT_DELETES_SETTING.getKey(), false); } IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(indexMetadata, ClusterState.EMPTY_STATE) + ); assertThat(issues, empty()); } @@ -89,7 +149,10 @@ public void testIndexDataPathSetting() { Settings.Builder settings = settings(IndexVersion.current()); settings.put(IndexMetadata.INDEX_DATA_PATH_SETTING.getKey(), createTempDir()); IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(indexMetadata, ClusterState.EMPTY_STATE) + ); final String expectedUrl = "https://www.elastic.co/guide/en/elasticsearch/reference/7.13/breaking-changes-7.13.html#deprecate-shared-data-path-setting"; assertThat( @@ -111,7 +174,10 @@ public void testSimpleFSSetting() { Settings.Builder settings = settings(IndexVersion.current()); settings.put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), "simplefs"); IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(indexMetadata, ClusterState.EMPTY_STATE) + ); assertThat( issues, contains( @@ -133,7 +199,10 @@ public void testFrozenIndex() { Settings.Builder settings = settings(IndexVersion.current()); settings.put(FrozenEngine.INDEX_FROZEN.getKey(), true); IndexMetadata indexMetadata = IndexMetadata.builder("test").settings(settings).numberOfShards(1).numberOfReplicas(0).build(); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(indexMetadata)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(indexMetadata, ClusterState.EMPTY_STATE) + ); assertThat( issues, contains( @@ -175,7 +244,10 @@ public void testCamelCaseDeprecation() throws IOException { false, null ); - List issues = DeprecationChecks.filterChecks(INDEX_SETTINGS_CHECKS, c -> c.apply(simpleIndex)); + List issues = DeprecationChecks.filterChecks( + INDEX_SETTINGS_CHECKS, + c -> c.apply(simpleIndex, ClusterState.EMPTY_STATE) + ); assertThat(issues, hasItem(expected)); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSortedSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSortedSourceOperatorTests.java index b126ca8af0e31..4863eea5d5ca3 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSortedSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSortedSourceOperatorTests.java @@ -45,6 +45,7 @@ import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.hamcrest.Matcher; import org.junit.After; @@ -363,12 +364,12 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens final List fields = new ArrayList<>(); fields.add(new SortedNumericDocValuesField(DataStreamTimestampFieldMapper.DEFAULT_PATH, timestamp)); fields.add(new LongPoint(DataStreamTimestampFieldMapper.DEFAULT_PATH, timestamp)); - final TimeSeriesIdFieldMapper.TimeSeriesIdBuilder builder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); + var routingPathFields = new RoutingPathFields(null); for (int i = 0; i < dimensions.length; i += 2) { if (dimensions[i + 1] instanceof Number n) { - builder.addLong(dimensions[i].toString(), n.longValue()); + routingPathFields.addLong(dimensions[i].toString(), n.longValue()); } else { - builder.addString(dimensions[i].toString(), dimensions[i + 1].toString()); + routingPathFields.addString(dimensions[i].toString(), dimensions[i + 1].toString()); fields.add(new SortedSetDocValuesField(dimensions[i].toString(), new BytesRef(dimensions[i + 1].toString()))); } } @@ -382,7 +383,9 @@ public static void writeTS(RandomIndexWriter iw, long timestamp, Object[] dimens } } // Use legacy tsid to make tests easier to understand: - fields.add(new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.buildLegacyTsid().toBytesRef())); + fields.add( + new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, TimeSeriesIdFieldMapper.buildLegacyTsid(routingPathFields).toBytesRef()) + ); iw.addDocument(fields); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index 9039177e0643d..9c173795d0ab1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -51,6 +51,7 @@ import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.scalar.EsqlScalarFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.FoldablesConvertFunction; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; @@ -1226,6 +1227,16 @@ private Expression resolveConvertFunction(AbstractConvertFunction convert, List< if (convert.field() instanceof FieldAttribute fa && fa.field() instanceof InvalidMappedField imf) { HashMap typeResolutions = new HashMap<>(); Set supportedTypes = convert.supportedTypes(); + if (convert instanceof FoldablesConvertFunction fcf) { + // FoldablesConvertFunction does not accept fields as inputs, they only accept constants + String unresolvedMessage = "argument of [" + + fcf.sourceText() + + "] must be a constant, received [" + + Expressions.name(fa) + + "]"; + Expression ua = new UnresolvedAttribute(fa.source(), fa.name(), unresolvedMessage); + return fcf.replaceChildren(Collections.singletonList(ua)); + } imf.types().forEach(type -> { if (supportedTypes.contains(type.widenSmallNumeric())) { TypeResolutionKey key = new TypeResolutionKey(fa.name(), type); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java index 2419aa83845a8..286ddbaa29a5b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java @@ -45,6 +45,7 @@ import org.elasticsearch.compute.operator.lookup.MergePositionsOperator; import org.elasticsearch.compute.operator.lookup.QueryList; import org.elasticsearch.core.AbstractRefCounted; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.RefCounted; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -185,7 +186,7 @@ protected static QueryList termQueryList( return switch (inputDataType) { case IP -> QueryList.ipTermQueryList(field, searchExecutionContext, (BytesRefBlock) block); case DATETIME -> QueryList.dateTermQueryList(field, searchExecutionContext, (LongBlock) block); - default -> QueryList.rawTermQueryList(field, searchExecutionContext, block); + case null, default -> QueryList.rawTermQueryList(field, searchExecutionContext, block); }; } @@ -459,6 +460,10 @@ abstract static class Request { abstract static class TransportRequest extends org.elasticsearch.transport.TransportRequest implements IndicesRequest { final String sessionId; final ShardId shardId; + /** + * For mixed clusters with nodes <8.14, this will be null. + */ + @Nullable final DataType inputDataType; final Page inputPage; final List extractFields; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java index f24a16bb63697..2d85b46e33a8c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichLookupService.java @@ -127,9 +127,9 @@ static TransportRequest readFrom(StreamInput in, BlockFactory blockFactory) thro TaskId parentTaskId = TaskId.readFromStream(in); String sessionId = in.readString(); ShardId shardId = new ShardId(in); - DataType inputDataType = DataType.fromTypeName( - (in.getTransportVersion().onOrAfter(TransportVersions.V_8_14_0)) ? in.readString() : "unknown" - ); + DataType inputDataType = (in.getTransportVersion().onOrAfter(TransportVersions.V_8_14_0)) + ? DataType.fromTypeName(in.readString()) + : null; String matchType = in.readString(); String matchField = in.readString(); Page inputPage; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java index 6e2b5bb63532d..8f43a6481db07 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/FoldablesConvertFunction.java @@ -59,7 +59,8 @@ protected final TypeResolution resolveType() { @Override protected final Map factories() { - // TODO if a union type field is provided as an input, the correct error message is not shown, #112668 is a follow up + // This is used by ResolveUnionTypes, which is expected to be applied to ES fields only + // FoldablesConvertFunction takes only constants as inputs, so this is empty return Map.of(); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d6cda4a3a9ff7..0a34d6cd848bb 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -244,6 +244,34 @@ public void testUnsupportedAndMultiTypedFields() { + " [ip] in [test1, test2, test3] and [2] other indices, [keyword] in [test6]", error("from test* | where multi_typed is not null", analyzer) ); + + for (String functionName : List.of("to_timeduration", "to_dateperiod")) { + String lineNumber = functionName.equalsIgnoreCase("to_timeduration") ? "47" : "45"; + String errorType = functionName.equalsIgnoreCase("to_timeduration") ? "time_duration" : "date_period"; + assertEquals( + "1:" + lineNumber + ": Cannot use field [unsupported] with unsupported type [flattened]", + error("from test* | eval x = now() + " + functionName + "(unsupported)", analyzer) + ); + assertEquals( + "1:" + lineNumber + ": argument of [" + functionName + "(multi_typed)] must be a constant, received [multi_typed]", + error("from test* | eval x = now() + " + functionName + "(multi_typed)", analyzer) + ); + assertThat( + error("from test* | eval x = unsupported, y = now() + " + functionName + "(x)", analyzer), + containsString("1:23: Cannot use field [unsupported] with unsupported type [flattened]") + ); + assertThat( + error("from test* | eval x = multi_typed, y = now() + " + functionName + "(x)", analyzer), + containsString( + "1:48: argument of [" + + functionName + + "(x)] must be [" + + errorType + + " or string], " + + "found value [x] type [unsupported]" + ) + ); + } } public void testRoundFunctionInvalidInputs() { diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java index b43d87c17e644..ec04bfdd058f9 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java @@ -645,7 +645,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio } if (dimension && numericValue != null) { - context.getDimensions().addUnsignedLong(fieldType().name(), numericValue).validate(context.indexSettings()); + context.getRoutingFields().addUnsignedLong(fieldType().name(), numericValue); } List fields = new ArrayList<>(); diff --git a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java index 86575d418e605..1a9eb1fde6c87 100644 --- a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java +++ b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/search/aggregations/GeoLineAggregatorTests.java @@ -46,6 +46,7 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.search.aggregations.AggregationBuilder; @@ -797,12 +798,12 @@ private void assertGeoLine_TSDB( ArrayList points = testData.pointsForGroup(g); ArrayList timestamps = testData.timestampsForGroup(g); for (int i = 0; i < points.size(); i++) { - final TimeSeriesIdFieldMapper.TimeSeriesIdBuilder builder = new TimeSeriesIdFieldMapper.TimeSeriesIdBuilder(null); - builder.addString("group_id", testData.groups[g]); + var routingFields = new RoutingPathFields(null); + routingFields.addString("group_id", testData.groups[g]); ArrayList fields = new ArrayList<>( Arrays.asList( new SortedDocValuesField("group_id", new BytesRef(testData.groups[g])), - new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, builder.buildTsidHash().toBytesRef()) + new SortedDocValuesField(TimeSeriesIdFieldMapper.NAME, routingFields.buildHash().toBytesRef()) ) ); GeoPoint point = points.get(i);