diff --git a/.buildkite/pipelines/dra-workflow.yml b/.buildkite/pipelines/dra-workflow.yml
index bcc6c9c57d756..25477c8541fa9 100644
--- a/.buildkite/pipelines/dra-workflow.yml
+++ b/.buildkite/pipelines/dra-workflow.yml
@@ -6,7 +6,8 @@ steps:
provider: gcp
image: family/elasticsearch-ubuntu-2204
machineType: custom-32-98304
- buildDirectory: /dev/shm/bk
+ localSsds: 1
+ localSsdInterface: nvme
diskSizeGb: 350
- wait
# The hadoop build depends on the ES artifact
diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml
index e44a1e67e9d59..1bb13c4c10966 100644
--- a/.buildkite/pipelines/intake.yml
+++ b/.buildkite/pipelines/intake.yml
@@ -76,6 +76,7 @@ steps:
- trigger: elasticsearch-dra-workflow
label: Trigger DRA snapshot workflow
async: true
+ branches: "main 8.* 7.17"
build:
branch: "$BUILDKITE_BRANCH"
commit: "$BUILDKITE_COMMIT"
diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties
index ac75a3a968ed1..169c187ef115a 100644
--- a/build-tools-internal/version.properties
+++ b/build-tools-internal/version.properties
@@ -30,7 +30,7 @@ httpcore = 4.4.13
httpasyncclient = 4.1.5
commonslogging = 1.2
commonscodec = 1.15
-protobuf = 3.21.9
+protobuf = 3.25.5
# test dependencies
randomizedrunner = 2.8.0
diff --git a/docs/changelog/111336.yaml b/docs/changelog/111336.yaml
new file mode 100644
index 0000000000000..d5bf602cb7a88
--- /dev/null
+++ b/docs/changelog/111336.yaml
@@ -0,0 +1,5 @@
+pr: 111336
+summary: Use the same chunking configurations for models in the Elasticsearch service
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/112933.yaml b/docs/changelog/112933.yaml
new file mode 100644
index 0000000000000..222cd5aadf739
--- /dev/null
+++ b/docs/changelog/112933.yaml
@@ -0,0 +1,5 @@
+pr: 112933
+summary: "Allow incubating Panama Vector in simdvec, and add vectorized `ipByteBin`"
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/113251.yaml b/docs/changelog/113251.yaml
new file mode 100644
index 0000000000000..49167e6e4c915
--- /dev/null
+++ b/docs/changelog/113251.yaml
@@ -0,0 +1,5 @@
+pr: 113251
+summary: Span term query to convert to match no docs when unmapped field is targeted
+area: Search
+type: bug
+issues: []
diff --git a/docs/changelog/113297.yaml b/docs/changelog/113297.yaml
new file mode 100644
index 0000000000000..476619f432639
--- /dev/null
+++ b/docs/changelog/113297.yaml
@@ -0,0 +1,5 @@
+pr: 113297
+summary: "[ES|QL] add reverse function"
+area: ES|QL
+type: enhancement
+issues: []
diff --git a/docs/changelog/113812.yaml b/docs/changelog/113812.yaml
new file mode 100644
index 0000000000000..04498b4ae5f7e
--- /dev/null
+++ b/docs/changelog/113812.yaml
@@ -0,0 +1,5 @@
+pr: 113812
+summary: Add Streaming Inference spec
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/113846.yaml b/docs/changelog/113846.yaml
new file mode 100644
index 0000000000000..5fdd56e98d706
--- /dev/null
+++ b/docs/changelog/113846.yaml
@@ -0,0 +1,6 @@
+pr: 113846
+summary: Don't validate internal stats if they are empty
+area: Aggregations
+type: bug
+issues:
+ - 113811
diff --git a/docs/changelog/113869.yaml b/docs/changelog/113869.yaml
new file mode 100644
index 0000000000000..f1cd1ec423966
--- /dev/null
+++ b/docs/changelog/113869.yaml
@@ -0,0 +1,5 @@
+pr: 113869
+summary: Upgrade protobufer to 3.25.5
+area: Snapshot/Restore
+type: upgrade
+issues: []
diff --git a/docs/changelog/113900.yaml b/docs/changelog/113900.yaml
new file mode 100644
index 0000000000000..25f833d251784
--- /dev/null
+++ b/docs/changelog/113900.yaml
@@ -0,0 +1,5 @@
+pr: 113900
+summary: Fix BWC for file-settings based role mappings
+area: Authentication
+type: bug
+issues: []
diff --git a/docs/changelog/113961.yaml b/docs/changelog/113961.yaml
new file mode 100644
index 0000000000000..24cb1f45f029e
--- /dev/null
+++ b/docs/changelog/113961.yaml
@@ -0,0 +1,5 @@
+pr: 113961
+summary: "[ESQL] Support datetime data type in Least and Greatest functions"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/114080.yaml b/docs/changelog/114080.yaml
new file mode 100644
index 0000000000000..395768c46369a
--- /dev/null
+++ b/docs/changelog/114080.yaml
@@ -0,0 +1,5 @@
+pr: 114080
+summary: Stream Cohere Completion
+area: Machine Learning
+type: enhancement
+issues: []
diff --git a/docs/changelog/114116.yaml b/docs/changelog/114116.yaml
new file mode 100644
index 0000000000000..8d1c9e162ae23
--- /dev/null
+++ b/docs/changelog/114116.yaml
@@ -0,0 +1,5 @@
+pr: 114116
+summary: "ES|QL: Ensure minimum capacity for `PlanStreamInput` caches"
+area: ES|QL
+type: bug
+issues: []
diff --git a/docs/changelog/114177.yaml b/docs/changelog/114177.yaml
new file mode 100644
index 0000000000000..d68486469d797
--- /dev/null
+++ b/docs/changelog/114177.yaml
@@ -0,0 +1,5 @@
+pr: 114177
+summary: "Make `randomInstantBetween` always return value in range [minInstant, `maxInstant]`"
+area: Infra/Metrics
+type: bug
+issues: []
diff --git a/docs/reference/connector/apis/create-connector-api.asciidoc b/docs/reference/connector/apis/create-connector-api.asciidoc
index a115eab8853c0..3ecef6d302732 100644
--- a/docs/reference/connector/apis/create-connector-api.asciidoc
+++ b/docs/reference/connector/apis/create-connector-api.asciidoc
@@ -116,7 +116,7 @@ PUT _connector/my-connector
"name": "My Connector",
"description": "My Connector to sync data to Elastic index from Google Drive",
"service_type": "google_drive",
- "language": "english"
+ "language": "en"
}
----
diff --git a/docs/reference/connector/docs/connectors-zoom.asciidoc b/docs/reference/connector/docs/connectors-zoom.asciidoc
index d01b9c2be0368..d945a0aec3da1 100644
--- a/docs/reference/connector/docs/connectors-zoom.asciidoc
+++ b/docs/reference/connector/docs/connectors-zoom.asciidoc
@@ -63,18 +63,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
6. Click on the "Create" button to create the app registration.
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
-9. The following scopes need to be added to the app.
+9. The following granular scopes need to be added to the app.
+
[source,bash]
----
-user:read:admin
-meeting:read:admin
-chat_channel:read:admin
-recording:read:admin
-chat_message:read:admin
-report:read:admin
+user:read:list_users:admin
+meeting:read:list_meetings:admin
+meeting:read:list_past_participants:admin
+cloud_recording:read:list_user_recordings:admin
+team_chat:read:list_user_channels:admin
+team_chat:read:list_user_messages:admin
----
-
+[NOTE]
+====
+The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
+====
++
10. Click on the "Done" button to add the selected scopes to your app.
11. Navigate to the "Activation" section and input the necessary information to activate the app.
@@ -220,18 +224,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
6. Click on the "Create" button to create the app registration.
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
-9. The following scopes need to be added to the app.
+9. The following granular scopes need to be added to the app.
+
[source,bash]
----
-user:read:admin
-meeting:read:admin
-chat_channel:read:admin
-recording:read:admin
-chat_message:read:admin
-report:read:admin
+user:read:list_users:admin
+meeting:read:list_meetings:admin
+meeting:read:list_past_participants:admin
+cloud_recording:read:list_user_recordings:admin
+team_chat:read:list_user_channels:admin
+team_chat:read:list_user_messages:admin
----
-
+[NOTE]
+====
+The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
+====
++
10. Click on the "Done" button to add the selected scopes to your app.
11. Navigate to the "Activation" section and input the necessary information to activate the app.
diff --git a/docs/reference/esql/functions/description/reverse.asciidoc b/docs/reference/esql/functions/description/reverse.asciidoc
new file mode 100644
index 0000000000000..fbb3f3f6b4d54
--- /dev/null
+++ b/docs/reference/esql/functions/description/reverse.asciidoc
@@ -0,0 +1,5 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Description*
+
+Returns a new string representing the input string in reverse order.
diff --git a/docs/reference/esql/functions/examples/reverse.asciidoc b/docs/reference/esql/functions/examples/reverse.asciidoc
new file mode 100644
index 0000000000000..67c8af077b174
--- /dev/null
+++ b/docs/reference/esql/functions/examples/reverse.asciidoc
@@ -0,0 +1,22 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Examples*
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=reverse]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/string.csv-spec[tag=reverse-result]
+|===
+`REVERSE` works with unicode, too! It keeps unicode grapheme clusters together during reversal.
+[source.merge.styled,esql]
+----
+include::{esql-specs}/string.csv-spec[tag=reverseEmoji]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/string.csv-spec[tag=reverseEmoji-result]
+|===
+
diff --git a/docs/reference/esql/functions/kibana/definition/greatest.json b/docs/reference/esql/functions/kibana/definition/greatest.json
index 0e32fca5b4ca1..2818a5ac56339 100644
--- a/docs/reference/esql/functions/kibana/definition/greatest.json
+++ b/docs/reference/esql/functions/kibana/definition/greatest.json
@@ -35,6 +35,24 @@
"variadic" : true,
"returnType" : "boolean"
},
+ {
+ "params" : [
+ {
+ "name" : "first",
+ "type" : "date",
+ "optional" : false,
+ "description" : "First of the columns to evaluate."
+ },
+ {
+ "name" : "rest",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The rest of the columns to evaluate."
+ }
+ ],
+ "variadic" : true,
+ "returnType" : "date"
+ },
{
"params" : [
{
diff --git a/docs/reference/esql/functions/kibana/definition/least.json b/docs/reference/esql/functions/kibana/definition/least.json
index 0ba34cf3cc9a2..7b545896f4ddc 100644
--- a/docs/reference/esql/functions/kibana/definition/least.json
+++ b/docs/reference/esql/functions/kibana/definition/least.json
@@ -34,6 +34,24 @@
"variadic" : true,
"returnType" : "boolean"
},
+ {
+ "params" : [
+ {
+ "name" : "first",
+ "type" : "date",
+ "optional" : false,
+ "description" : "First of the columns to evaluate."
+ },
+ {
+ "name" : "rest",
+ "type" : "date",
+ "optional" : true,
+ "description" : "The rest of the columns to evaluate."
+ }
+ ],
+ "variadic" : true,
+ "returnType" : "date"
+ },
{
"params" : [
{
diff --git a/docs/reference/esql/functions/kibana/definition/reverse.json b/docs/reference/esql/functions/kibana/definition/reverse.json
new file mode 100644
index 0000000000000..1b222691530f2
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/definition/reverse.json
@@ -0,0 +1,38 @@
+{
+ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
+ "type" : "eval",
+ "name" : "reverse",
+ "description" : "Returns a new string representing the input string in reverse order.",
+ "signatures" : [
+ {
+ "params" : [
+ {
+ "name" : "str",
+ "type" : "keyword",
+ "optional" : false,
+ "description" : "String expression. If `null`, the function returns `null`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "keyword"
+ },
+ {
+ "params" : [
+ {
+ "name" : "str",
+ "type" : "text",
+ "optional" : false,
+ "description" : "String expression. If `null`, the function returns `null`."
+ }
+ ],
+ "variadic" : false,
+ "returnType" : "text"
+ }
+ ],
+ "examples" : [
+ "ROW message = \"Some Text\" | EVAL message_reversed = REVERSE(message);",
+ "ROW bending_arts = \"💧🪨🔥💨\" | EVAL bending_arts_reversed = REVERSE(bending_arts);"
+ ],
+ "preview" : false,
+ "snapshot_only" : false
+}
diff --git a/docs/reference/esql/functions/kibana/docs/reverse.md b/docs/reference/esql/functions/kibana/docs/reverse.md
new file mode 100644
index 0000000000000..cbeade9189d80
--- /dev/null
+++ b/docs/reference/esql/functions/kibana/docs/reverse.md
@@ -0,0 +1,10 @@
+
+
+### REVERSE
+Returns a new string representing the input string in reverse order.
+
+```
+ROW message = "Some Text" | EVAL message_reversed = REVERSE(message);
+```
diff --git a/docs/reference/esql/functions/layout/reverse.asciidoc b/docs/reference/esql/functions/layout/reverse.asciidoc
new file mode 100644
index 0000000000000..99c236d63492e
--- /dev/null
+++ b/docs/reference/esql/functions/layout/reverse.asciidoc
@@ -0,0 +1,15 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+[discrete]
+[[esql-reverse]]
+=== `REVERSE`
+
+*Syntax*
+
+[.text-center]
+image::esql/functions/signature/reverse.svg[Embedded,opts=inline]
+
+include::../parameters/reverse.asciidoc[]
+include::../description/reverse.asciidoc[]
+include::../types/reverse.asciidoc[]
+include::../examples/reverse.asciidoc[]
diff --git a/docs/reference/esql/functions/parameters/reverse.asciidoc b/docs/reference/esql/functions/parameters/reverse.asciidoc
new file mode 100644
index 0000000000000..d56d115662491
--- /dev/null
+++ b/docs/reference/esql/functions/parameters/reverse.asciidoc
@@ -0,0 +1,6 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Parameters*
+
+`str`::
+String expression. If `null`, the function returns `null`.
diff --git a/docs/reference/esql/functions/signature/reverse.svg b/docs/reference/esql/functions/signature/reverse.svg
new file mode 100644
index 0000000000000..c23ce5583a8c0
--- /dev/null
+++ b/docs/reference/esql/functions/signature/reverse.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/reference/esql/functions/string-functions.asciidoc b/docs/reference/esql/functions/string-functions.asciidoc
index ed97769b900e7..f5222330d579d 100644
--- a/docs/reference/esql/functions/string-functions.asciidoc
+++ b/docs/reference/esql/functions/string-functions.asciidoc
@@ -17,6 +17,7 @@
* <>
* <>
* <>
+* <>
* <>
* <>
* <>
@@ -38,6 +39,7 @@ include::layout/locate.asciidoc[]
include::layout/ltrim.asciidoc[]
include::layout/repeat.asciidoc[]
include::layout/replace.asciidoc[]
+include::layout/reverse.asciidoc[]
include::layout/right.asciidoc[]
include::layout/rtrim.asciidoc[]
include::layout/space.asciidoc[]
diff --git a/docs/reference/esql/functions/types/greatest.asciidoc b/docs/reference/esql/functions/types/greatest.asciidoc
index 537be55cd17ef..1454bbb6f81c1 100644
--- a/docs/reference/esql/functions/types/greatest.asciidoc
+++ b/docs/reference/esql/functions/types/greatest.asciidoc
@@ -7,6 +7,7 @@
first | rest | result
boolean | boolean | boolean
boolean | | boolean
+date | date | date
double | double | double
integer | integer | integer
integer | | integer
diff --git a/docs/reference/esql/functions/types/least.asciidoc b/docs/reference/esql/functions/types/least.asciidoc
index 537be55cd17ef..1454bbb6f81c1 100644
--- a/docs/reference/esql/functions/types/least.asciidoc
+++ b/docs/reference/esql/functions/types/least.asciidoc
@@ -7,6 +7,7 @@
first | rest | result
boolean | boolean | boolean
boolean | | boolean
+date | date | date
double | double | double
integer | integer | integer
integer | | integer
diff --git a/docs/reference/esql/functions/types/reverse.asciidoc b/docs/reference/esql/functions/types/reverse.asciidoc
new file mode 100644
index 0000000000000..974066d225bca
--- /dev/null
+++ b/docs/reference/esql/functions/types/reverse.asciidoc
@@ -0,0 +1,10 @@
+// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.
+
+*Supported types*
+
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+str | result
+keyword | keyword
+text | text
+|===
diff --git a/docs/reference/ingest/processors/inference.asciidoc b/docs/reference/ingest/processors/inference.asciidoc
index fa4f246cdd7c8..4699f634afe37 100644
--- a/docs/reference/ingest/processors/inference.asciidoc
+++ b/docs/reference/ingest/processors/inference.asciidoc
@@ -169,6 +169,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -224,6 +236,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -304,6 +328,23 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`span`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
+
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -363,6 +404,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -424,6 +477,22 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`span`::::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
+
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -515,6 +584,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
diff --git a/docs/reference/mapping/runtime.asciidoc b/docs/reference/mapping/runtime.asciidoc
index 190081fa801b4..1ee1194279061 100644
--- a/docs/reference/mapping/runtime.asciidoc
+++ b/docs/reference/mapping/runtime.asciidoc
@@ -821,8 +821,6 @@ address.
[[lookup-runtime-fields]]
==== Retrieve fields from related indices
-experimental[]
-
The <> parameter on the `_search` API can also be used to retrieve fields from
the related indices via runtime fields with a type of `lookup`.
diff --git a/docs/reference/mapping/types/date.asciidoc b/docs/reference/mapping/types/date.asciidoc
index 44e1c2949775e..ca2c23f932fc3 100644
--- a/docs/reference/mapping/types/date.asciidoc
+++ b/docs/reference/mapping/types/date.asciidoc
@@ -125,8 +125,7 @@ The following parameters are accepted by `date` fields:
`locale`::
The locale to use when parsing dates since months do not have the same names
- and/or abbreviations in all languages. The default is the
- https://docs.oracle.com/javase/8/docs/api/java/util/Locale.html#ROOT[`ROOT` locale].
+ and/or abbreviations in all languages. The default is ENGLISH.
<>::
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
index 97122141d7558..ef19fbf4e267d 100644
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -988,6 +988,7 @@ values are
+
--
* `bert`: Use for BERT-style models
+* `deberta_v2`: Use for DeBERTa v2 and v3-style models
* `mpnet`: Use for MPNet-style models
* `roberta`: Use for RoBERTa-style and BART-style models
* experimental:[] `xlm_roberta`: Use for XLMRoBERTa-style models
@@ -1037,6 +1038,19 @@ sequence. Therefore, do not use `second` in this case.
end::inference-config-nlp-tokenization-truncate[]
+tag::inference-config-nlp-tokenization-truncate-deberta-v2[]
+Indicates how tokens are truncated when they exceed `max_sequence_length`.
+The default value is `first`.
++
+--
+* `balanced`: One or both of the first and second sequences may be truncated so as to balance the tokens included from both sequences.
+* `none`: No truncation occurs; the inference request receives an error.
+* `first`: Only the first sequence is truncated.
+* `second`: Only the second sequence is truncated. If there is just one sequence, that sequence is truncated.
+--
+
+end::inference-config-nlp-tokenization-truncate-deberta-v2[]
+
tag::inference-config-nlp-tokenization-bert-with-special-tokens[]
Tokenize with special tokens. The tokens typically included in BERT-style tokenization are:
+
@@ -1050,10 +1064,23 @@ tag::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
Tokenize with special tokens if `true`.
end::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
+tag::inference-config-nlp-tokenization-deberta-v2[]
+DeBERTa-style tokenization is to be performed with the enclosed settings.
+end::inference-config-nlp-tokenization-deberta-v2[]
+
tag::inference-config-nlp-tokenization-max-sequence-length[]
Specifies the maximum number of tokens allowed to be output by the tokenizer.
end::inference-config-nlp-tokenization-max-sequence-length[]
+tag::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
+Tokenize with special tokens. The tokens typically included in DeBERTa-style tokenization are:
++
+--
+* `[CLS]`: The first token of the sequence being classified.
+* `[SEP]`: Indicates sequence separation and sequence end.
+--
+end::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
+
tag::inference-config-nlp-tokenization-roberta[]
RoBERTa-style tokenization is to be performed with the enclosed settings.
end::inference-config-nlp-tokenization-roberta[]
diff --git a/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc b/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
index 9aac913e7559f..99c3ecad03a9d 100644
--- a/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
+++ b/docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc
@@ -137,6 +137,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
+`deberta_v2`::::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+=======
+`truncate`::::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+=======
+
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
diff --git a/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc b/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc
index e29bc8823ab29..32265af5f795b 100644
--- a/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc
+++ b/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc
@@ -773,6 +773,37 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, boolean)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
====
+`deberta_v2`::
+(Optional, object)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
++
+.Properties of deberta_v2
+[%collapsible%open]
+====
+`do_lower_case`:::
+(Optional, boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
++
+--
+Defaults to `false`.
+--
+
+`max_sequence_length`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
+
+`span`:::
+(Optional, integer)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
+
+`truncate`:::
+(Optional, string)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
+
+`with_special_tokens`:::
+(Optional, boolean)
+include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2-with-special-tokens]
+====
`roberta`::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
diff --git a/docs/reference/release-notes/8.15.2.asciidoc b/docs/reference/release-notes/8.15.2.asciidoc
new file mode 100644
index 0000000000000..7dfd8690109b2
--- /dev/null
+++ b/docs/reference/release-notes/8.15.2.asciidoc
@@ -0,0 +1,42 @@
+[[release-notes-8.15.2]]
+== {es} version 8.15.2
+
+Also see <>.
+
+[[bug-8.15.2]]
+[float]
+=== Bug fixes
+
+Authorization::
+* Fix remote cluster credential secure settings reload {es-pull}111535[#111535]
+
+ES|QL::
+* ESQL: Don't mutate the `BoolQueryBuilder` in plan {es-pull}111519[#111519]
+* ES|QL: Fix `ResolvedEnrichPolicy` serialization (bwc) in v 8.15 {es-pull}112985[#112985] (issue: {es-issue}112968[#112968])
+* Fix union-types where one index is missing the field {es-pull}111932[#111932] (issue: {es-issue}111912[#111912])
+* Support widening of numeric types in union-types {es-pull}112610[#112610] (issue: {es-issue}111277[#111277])
+
+Infra/Core::
+* JSON parse failures should be 4xx codes {es-pull}112703[#112703]
+* Json parsing exceptions should not cause 500 errors {es-pull}111548[#111548] (issue: {es-issue}111542[#111542])
+* Make sure file accesses in `DnRoleMapper` are done in stack frames with permissions {es-pull}112400[#112400]
+
+Ingest Node::
+* Fix missing header in `put_geoip_database` JSON spec {es-pull}112581[#112581]
+
+Logs::
+* Fix encoding of dynamic arrays in ignored source {es-pull}112713[#112713]
+
+Mapping::
+* Full coverage of ECS by ecs@mappings when `date_detection` is disabled {es-pull}112444[#112444] (issue: {es-issue}112398[#112398])
+
+Search::
+* Fix parsing error in `_terms_enum` API {es-pull}112872[#112872] (issue: {es-issue}94378[#94378])
+
+Security::
+* Allowlist `tracestate` header on remote server port {es-pull}112649[#112649]
+
+Vector Search::
+* Fix NPE in `dense_vector` stats {es-pull}112720[#112720]
+
+
diff --git a/docs/reference/release-notes/highlights.asciidoc b/docs/reference/release-notes/highlights.asciidoc
index bf5260928797c..1e0018f590ac0 100644
--- a/docs/reference/release-notes/highlights.asciidoc
+++ b/docs/reference/release-notes/highlights.asciidoc
@@ -72,16 +72,54 @@ version 8.16 `allow_rebalance` setting defaults to `always` unless the legacy al
[discrete]
[[add_global_retention_in_data_stream_lifecycle]]
=== Add global retention in data stream lifecycle
-Data stream lifecycle now supports configuring retention on a cluster level, namely global retention. Global retention
-allows us to configure two different retentions:
+Data stream lifecycle now supports configuring retention on a cluster level,
+namely global retention. Global retention \nallows us to configure two different
+retentions:
-- `data_streams.lifecycle.retention.default` is applied to all data streams managed by the data stream lifecycle that do not have retention
-defined on the data stream level.
-- `data_streams.lifecycle.retention.max` is applied to all data streams managed by the data stream lifecycle and it allows any data stream
-data to be deleted after the `max_retention` has passed.
+- `data_streams.lifecycle.retention.default` is applied to all data streams managed
+by the data stream lifecycle that do not have retention defined on the data stream level.
+- `data_streams.lifecycle.retention.max` is applied to all data streams managed by the
+data stream lifecycle and it allows any data stream \ndata to be deleted after the `max_retention` has passed.
{es-pull}111972[#111972]
+[discrete]
+[[enable_zstandard_compression_for_indices_with_index_codec_set_to_best_compression]]
+=== Enable ZStandard compression for indices with index.codec set to best_compression
+Before DEFLATE compression was used to compress stored fields in indices with index.codec index setting set to
+best_compression, with this change ZStandard is used as compression algorithm to stored fields for indices with
+index.codec index setting set to best_compression. The usage ZStandard results in less storage usage with a
+similar indexing throughput depending on what options are used. Experiments with indexing logs have shown that
+ZStandard offers ~12% lower storage usage and a ~14% higher indexing throughput compared to DEFLATE.
+
+{es-pull}112665[#112665]
+
// end::notable-highlights[]
+[discrete]
+[[esql_multi_value_fields_supported_in_geospatial_predicates]]
+=== ESQL: Multi-value fields supported in Geospatial predicates
+Supporting multi-value fields in `WHERE` predicates is a challenge due to not knowing whether `ALL` or `ANY`
+of the values in the field should pass the predicate.
+For example, should the field `age:[10,30]` pass the predicate `WHERE age>20` or not?
+This ambiguity does not exist with the spatial predicates
+`ST_INTERSECTS` and `ST_DISJOINT`, because the choice between `ANY` or `ALL`
+is implied by the predicate itself.
+Consider a predicate checking a field named `location` against a test geometry named `shape`:
+
+* `ST_INTERSECTS(field, shape)` - true if `ANY` value can intersect the shape
+* `ST_DISJOINT(field, shape)` - true only if `ALL` values are disjoint from the shape
+
+This works even if the shape argument is itself a complex or compound geometry.
+
+Similar logic exists for `ST_CONTAINS` and `ST_WITHIN` predicates, but these are not as easily solved
+with `ANY` or `ALL`, because a collection of geometries contains another collection if each of the contained
+geometries is within at least one of the containing geometries. Evaluating this requires that the multi-value
+field is first combined into a single geometry before performing the predicate check.
+
+* `ST_CONTAINS(field, shape)` - true if the combined geometry contains the shape
+* `ST_WITHIN(field, shape)` - true if the combined geometry is within the shape
+
+{es-pull}112063[#112063]
+
diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc
index fabd495cdc525..993bb8cb894f9 100644
--- a/docs/reference/rest-api/common-parms.asciidoc
+++ b/docs/reference/rest-api/common-parms.asciidoc
@@ -1298,10 +1298,11 @@ tag::wait_for_active_shards[]
`wait_for_active_shards`::
+
--
-(Optional, string) The number of shard copies that must be active before
-proceeding with the operation. Set to `all` or any positive integer up
-to the total number of shards in the index (`number_of_replicas+1`).
-Default: 1, the primary shard.
+(Optional, string) The number of copies of each shard that must be active
+before proceeding with the operation. Set to `all` or any non-negative integer
+up to the total number of copies of each shard in the index
+(`number_of_replicas+1`). Defaults to `1`, meaning to wait just for each
+primary shard to be active.
See <>.
--
diff --git a/docs/reference/setup/install.asciidoc b/docs/reference/setup/install.asciidoc
index 89373d0ce8d44..a38fdcfc36fd5 100644
--- a/docs/reference/setup/install.asciidoc
+++ b/docs/reference/setup/install.asciidoc
@@ -76,27 +76,29 @@ Docker container images may be downloaded from the Elastic Docker Registry.
[[jvm-version]]
=== Java (JVM) Version
-{es} is built using Java, and includes a bundled version of
-https://openjdk.java.net[OpenJDK] from the JDK maintainers (GPLv2+CE) within
-each distribution. The bundled JVM is the recommended JVM.
-
-To use your own version of Java, set the `ES_JAVA_HOME` environment variable.
-If you must use a version of Java that is different from the bundled JVM, it is
-best to use the latest release of a link:/support/matrix[supported]
-https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java].
-{es} is closely coupled to certain OpenJDK-specific features, so it may not
-work correctly with other JVMs. {es} will refuse to start if a known-bad
-version of Java is used.
-
-If you use a JVM other than the bundled one, you are responsible for reacting
-to announcements related to its security issues and bug fixes, and must
-yourself determine whether each update is necessary or not. In contrast, the
-bundled JVM is treated as an integral part of {es}, which means that Elastic
-takes responsibility for keeping it up to date. Security issues and bugs within
-the bundled JVM are treated as if they were within {es} itself.
-
-The bundled JVM is located within the `jdk` subdirectory of the {es} home
-directory. You may remove this directory if using your own JVM.
+{es} is built using Java, and includes a bundled version of https://openjdk.java.net[OpenJDK] within each distribution. We strongly
+recommend using the bundled JVM in all installations of {es}.
+
+The bundled JVM is treated the same as any other dependency of {es} in terms of support and maintenance. This means that Elastic takes
+responsibility for keeping it up to date, and reacts to security issues and bug reports as needed to address vulnerabilities and other bugs
+in {es}. Elastic's support of the bundled JVM is subject to Elastic's https://www.elastic.co/support_policy[support policy] and
+https://www.elastic.co/support/eol[end-of-life schedule] and is independent of the support policy and end-of-life schedule offered by the
+original supplier of the JVM. Elastic does not support using the bundled JVM for purposes other than running {es}.
+
+TIP: {es} uses only a subset of the features offered by the JVM. Bugs and security issues in the bundled JVM often relate to features that
+{es} does not use. Such issues do not apply to {es}. Elastic analyzes reports of security vulnerabilities in all its dependencies, including
+in the bundled JVM, and will issue an https://www.elastic.co/community/security[Elastic Security Advisory] if such an advisory is needed.
+
+If you decide to run {es} using a version of Java that is different from the bundled one, prefer to use the latest release of a
+https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java] which is link:/support/matrix[listed in the support matrix].
+Although such a configuration is supported, if you encounter a security issue or other bug in your chosen JVM then Elastic may not be able
+to help unless the issue is also present in the bundled JVM. Instead, you must seek assistance directly from the supplier of your chosen
+JVM. You must also take responsibility for reacting to security and bug announcements from the supplier of your chosen JVM. {es} may not
+perform optimally if using a JVM other than the bundled one. {es} is closely coupled to certain OpenJDK-specific features, so it may not
+work correctly with JVMs that are not OpenJDK. {es} will refuse to start if you attempt to use a known-bad JVM version.
+
+To use your own version of Java, set the `ES_JAVA_HOME` environment variable to the path to your own JVM installation. The bundled JVM is
+located within the `jdk` subdirectory of the {es} home directory. You may remove this directory if using your own JVM.
[discrete]
[[jvm-agents]]
diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc
index 1f55296139cd3..71a9fd8b87c96 100644
--- a/docs/reference/snapshot-restore/repository-s3.asciidoc
+++ b/docs/reference/snapshot-restore/repository-s3.asciidoc
@@ -296,9 +296,8 @@ include::repository-shared-settings.asciidoc[]
`max_multipart_parts` ::
- (<>) The maximum number of parts that {es} will write during a multipart upload
- of a single object. Files which are larger than `buffer_size × max_multipart_parts` will be
- chunked into several smaller objects. {es} may also split a file across multiple objects to
+ (integer) The maximum number of parts that {es} will write during a multipart upload of a single object. Files which are larger than
+ `buffer_size × max_multipart_parts` will be chunked into several smaller objects. {es} may also split a file across multiple objects to
satisfy other constraints such as the `chunk_size` limit. Defaults to `10000` which is the
https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3].
@@ -321,20 +320,14 @@ include::repository-shared-settings.asciidoc[]
`delete_objects_max_size`::
- (<>) Sets the maxmimum batch size, betewen 1 and 1000, used
- for `DeleteObjects` requests. Defaults to 1000 which is the maximum number
- supported by the
- https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html[AWS
- DeleteObjects API].
+ (integer) Sets the maxmimum batch size, betewen 1 and 1000, used for `DeleteObjects` requests. Defaults to 1000 which is the maximum
+ number supported by the https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html[AWS DeleteObjects API].
`max_multipart_upload_cleanup_size`::
- (<>) Sets the maximum number of possibly-dangling multipart
- uploads to clean up in each batch of snapshot deletions. Defaults to `1000`
- which is the maximum number supported by the
- https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html[AWS
- ListMultipartUploads API]. If set to `0`, {es} will not attempt to clean up
- dangling multipart uploads.
+ (integer) Sets the maximum number of possibly-dangling multipart uploads to clean up in each batch of snapshot deletions. Defaults to
+ `1000` which is the maximum number supported by the https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListMultipartUploads.html[AWS
+ ListMultipartUploads API]. If set to `0`, {es} will not attempt to clean up dangling multipart uploads.
NOTE: The option of defining client settings in the repository settings as
documented below is considered deprecated, and will be removed in a future
diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml
index f1c4b15ea5702..53a65e217ed18 100644
--- a/gradle/verification-metadata.xml
+++ b/gradle/verification-metadata.xml
@@ -749,9 +749,9 @@
-
-
-
+
+
+
@@ -759,9 +759,9 @@
-
-
-
+
+
+
diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/CellBoundary.java b/libs/h3/src/main/java/org/elasticsearch/h3/CellBoundary.java
index 74115d5a002d6..e0f9df174c2b5 100644
--- a/libs/h3/src/main/java/org/elasticsearch/h3/CellBoundary.java
+++ b/libs/h3/src/main/java/org/elasticsearch/h3/CellBoundary.java
@@ -22,36 +22,52 @@
*/
package org.elasticsearch.h3;
+import java.util.Arrays;
+import java.util.Objects;
+
/**
* cell boundary points as {@link LatLng}
*/
public final class CellBoundary {
-
/** Maximum number of cell boundary vertices; worst case is pentagon:
* 5 original verts + 5 edge crossings
*/
- private static final int MAX_CELL_BNDRY_VERTS = 10;
+ static final int MAX_CELL_BNDRY_VERTS = 10;
/** How many points it holds */
- private int numVertext;
+ private final int numPoints;
/** The actual points */
- private final LatLng[] points = new LatLng[MAX_CELL_BNDRY_VERTS];
-
- CellBoundary() {}
+ private final LatLng[] points;
- void add(LatLng point) {
- points[numVertext++] = point;
+ CellBoundary(LatLng[] points, int numPoints) {
+ this.points = points;
+ this.numPoints = numPoints;
}
/** Number of points in this boundary */
public int numPoints() {
- return numVertext;
+ return numPoints;
}
/** Return the point at the given position*/
public LatLng getLatLon(int i) {
- if (i >= numVertext) {
- throw new IndexOutOfBoundsException();
- }
+ assert i >= 0 && i < numPoints;
return points[i];
}
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ final CellBoundary that = (CellBoundary) o;
+ return numPoints == that.numPoints && Arrays.equals(points, that.points);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(numPoints, Arrays.hashCode(points));
+ }
}
diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java b/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java
index 570052700615f..3b3f760c0534f 100644
--- a/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java
+++ b/libs/h3/src/main/java/org/elasticsearch/h3/Constants.java
@@ -34,10 +34,6 @@ final class Constants {
* 2.0 * PI
*/
public static final double M_2PI = 2.0 * Math.PI;
- /**
- * max H3 resolution; H3 version 1 has 16 resolutions, numbered 0 through 15
- */
- public static int MAX_H3_RES = 15;
/**
* The number of H3 base cells
*/
diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java
index ae59ff359d1f8..866fdfe8a7f8b 100644
--- a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java
+++ b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java
@@ -439,7 +439,8 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) {
// convert each vertex to lat/lng
// adjust the face of each vertex as appropriate and introduce
// edge-crossing vertices as needed
- final CellBoundary boundary = new CellBoundary();
+ final LatLng[] points = new LatLng[CellBoundary.MAX_CELL_BNDRY_VERTS];
+ int numPoints = 0;
final CoordIJK scratch = new CoordIJK(0, 0, 0);
final FaceIJK fijk = new FaceIJK(this.face, scratch);
final int[][] coord = isResolutionClassIII ? VERTEX_CLASSIII : VERTEX_CLASSII;
@@ -501,21 +502,19 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) {
// find the intersection and add the lat/lng point to the result
final Vec2d inter = Vec2d.v2dIntersect(orig2d0, orig2d1, edge0, edge1);
- final LatLng point = inter.hex2dToGeo(fijkOrient.face, adjRes, true);
- boundary.add(point);
+ points[numPoints++] = inter.hex2dToGeo(fijkOrient.face, adjRes, true);
}
// convert vertex to lat/lng and add to the result
// vert == start + NUM_PENT_VERTS is only used to test for possible
// intersection on last edge
if (vert < start + Constants.NUM_PENT_VERTS) {
- final LatLng point = fijk.coord.ijkToGeo(fijk.face, adjRes, true);
- boundary.add(point);
+ points[numPoints++] = fijk.coord.ijkToGeo(fijk.face, adjRes, true);
}
lastFace = fijk.face;
lastCoord.reset(fijk.coord.i, fijk.coord.j, fijk.coord.k);
}
- return boundary;
+ return new CellBoundary(points, numPoints);
}
/**
@@ -547,7 +546,8 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final
// convert each vertex to lat/lng
// adjust the face of each vertex as appropriate and introduce
// edge-crossing vertices as needed
- final CellBoundary boundary = new CellBoundary();
+ final LatLng[] points = new LatLng[CellBoundary.MAX_CELL_BNDRY_VERTS];
+ int numPoints = 0;
final CoordIJK scratch1 = new CoordIJK(0, 0, 0);
final FaceIJK fijk = new FaceIJK(this.face, scratch1);
final CoordIJK scratch2 = isResolutionClassIII ? new CoordIJK(0, 0, 0) : null;
@@ -616,8 +616,7 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final
*/
final boolean isIntersectionAtVertex = orig2d0.numericallyIdentical(inter) || orig2d1.numericallyIdentical(inter);
if (isIntersectionAtVertex == false) {
- final LatLng point = inter.hex2dToGeo(this.face, adjRes, true);
- boundary.add(point);
+ points[numPoints++] = inter.hex2dToGeo(this.face, adjRes, true);
}
}
@@ -625,13 +624,12 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final
// vert == start + NUM_HEX_VERTS is only used to test for possible
// intersection on last edge
if (vert < start + Constants.NUM_HEX_VERTS) {
- final LatLng point = fijk.coord.ijkToGeo(fijk.face, adjRes, true);
- boundary.add(point);
+ points[numPoints++] = fijk.coord.ijkToGeo(fijk.face, adjRes, true);
}
lastFace = fijk.face;
lastOverage = overage;
}
- return boundary;
+ return new CellBoundary(points, numPoints);
}
/**
diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/H3.java b/libs/h3/src/main/java/org/elasticsearch/h3/H3.java
index 46bcc3f141dde..8c0bba62cecdb 100644
--- a/libs/h3/src/main/java/org/elasticsearch/h3/H3.java
+++ b/libs/h3/src/main/java/org/elasticsearch/h3/H3.java
@@ -30,8 +30,10 @@
* Defines the public API of the H3 library.
*/
public final class H3 {
-
- public static int MAX_H3_RES = Constants.MAX_H3_RES;
+ /**
+ * max H3 resolution; H3 version 1 has 16 resolutions, numbered 0 through 15
+ */
+ public static int MAX_H3_RES = 15;
private static final long[] NORTH = new long[MAX_H3_RES + 1];
private static final long[] SOUTH = new long[MAX_H3_RES + 1];
@@ -97,7 +99,7 @@ public static boolean h3IsValid(long h3) {
}
int res = H3Index.H3_get_resolution(h3);
- if (res < 0 || res > Constants.MAX_H3_RES) { // LCOV_EXCL_BR_LINE
+ if (res < 0 || res > MAX_H3_RES) { // LCOV_EXCL_BR_LINE
// Resolutions less than zero can not be represented in an index
return false;
}
@@ -118,7 +120,7 @@ public static boolean h3IsValid(long h3) {
}
}
- for (int r = res + 1; r <= Constants.MAX_H3_RES; r++) {
+ for (int r = res + 1; r <= MAX_H3_RES; r++) {
int digit = H3Index.H3_get_index_digit(h3, r);
if (digit != CoordIJK.Direction.INVALID_DIGIT.digit()) {
return false;
@@ -601,7 +603,7 @@ private static String[] h3ToStringList(long[] h3s) {
* @throws IllegalArgumentException res is not a valid H3 resolution.
*/
private static void checkResolution(int res) {
- if (res < 0 || res > Constants.MAX_H3_RES) {
+ if (res < 0 || res > MAX_H3_RES) {
throw new IllegalArgumentException("resolution [" + res + "] is out of range (must be 0 <= res <= 15)");
}
}
diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java b/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java
index 7babedc55eb0e..2b1b9cade21a4 100644
--- a/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java
+++ b/libs/h3/src/main/java/org/elasticsearch/h3/H3Index.java
@@ -160,14 +160,14 @@ public static int H3_get_resolution(long h3) {
* Gets the resolution res integer digit (0-7) of h3.
*/
public static int H3_get_index_digit(long h3, int res) {
- return ((int) ((((h3) >> ((Constants.MAX_H3_RES - (res)) * H3_PER_DIGIT_OFFSET)) & H3_DIGIT_MASK)));
+ return ((int) ((((h3) >> ((H3.MAX_H3_RES - (res)) * H3_PER_DIGIT_OFFSET)) & H3_DIGIT_MASK)));
}
/**
* Sets the resolution res digit of h3 to the integer digit (0-7)
*/
public static long H3_set_index_digit(long h3, int res, long digit) {
- int x = (Constants.MAX_H3_RES - res) * H3_PER_DIGIT_OFFSET;
+ int x = (H3.MAX_H3_RES - res) * H3_PER_DIGIT_OFFSET;
return (((h3) & ~((H3_DIGIT_MASK << (x)))) | (((digit)) << x));
}
diff --git a/libs/h3/src/test/java/org/elasticsearch/h3/CellBoundaryTests.java b/libs/h3/src/test/java/org/elasticsearch/h3/CellBoundaryTests.java
index 903e4ed40ec16..00ca6f7021e3d 100644
--- a/libs/h3/src/test/java/org/elasticsearch/h3/CellBoundaryTests.java
+++ b/libs/h3/src/test/java/org/elasticsearch/h3/CellBoundaryTests.java
@@ -218,4 +218,22 @@ private boolean isSharedBoundary(int clon1, int clat1, int clon2, int clat2, Cel
}
return false;
}
+
+ public void testEqualsAndHashCode() {
+ final long h3 = H3.geoToH3(GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude(), randomIntBetween(0, 15));
+ final CellBoundary boundary1 = H3.h3ToGeoBoundary(h3);
+ final CellBoundary boundary2 = H3.h3ToGeoBoundary(h3);
+ assertEquals(boundary1, boundary2);
+ assertEquals(boundary1.hashCode(), boundary2.hashCode());
+
+ final long otherH3 = H3.geoToH3(GeoTestUtil.nextLatitude(), GeoTestUtil.nextLongitude(), randomIntBetween(0, 15));
+ final CellBoundary otherCellBoundary = H3.h3ToGeoBoundary(otherH3);
+ if (otherH3 != h3) {
+ assertNotEquals(boundary1, otherCellBoundary);
+ assertNotEquals(boundary1.hashCode(), otherCellBoundary.hashCode());
+ } else {
+ assertEquals(boundary1, otherCellBoundary);
+ assertEquals(boundary1.hashCode(), otherCellBoundary.hashCode());
+ }
+ }
}
diff --git a/libs/h3/src/test/java/org/elasticsearch/h3/GeoToH3Tests.java b/libs/h3/src/test/java/org/elasticsearch/h3/GeoToH3Tests.java
index cb7d416a5a9d3..3f2c329d9ff3c 100644
--- a/libs/h3/src/test/java/org/elasticsearch/h3/GeoToH3Tests.java
+++ b/libs/h3/src/test/java/org/elasticsearch/h3/GeoToH3Tests.java
@@ -38,7 +38,7 @@ public void testRandomPoints() {
private void testPoint(double lat, double lon) {
GeoPoint point = new GeoPoint(PlanetModel.SPHERE, Math.toRadians(lat), Math.toRadians(lon));
- for (int res = 0; res < Constants.MAX_H3_RES; res++) {
+ for (int res = 0; res < H3.MAX_H3_RES; res++) {
String h3Address = H3.geoToH3Address(lat, lon, res);
assertEquals(res, H3.getResolution(h3Address));
GeoPolygon polygon = getGeoPolygon(h3Address);
diff --git a/libs/h3/src/test/java/org/elasticsearch/h3/HexRingTests.java b/libs/h3/src/test/java/org/elasticsearch/h3/HexRingTests.java
index 8fe5c6206fff8..864c0322cac90 100644
--- a/libs/h3/src/test/java/org/elasticsearch/h3/HexRingTests.java
+++ b/libs/h3/src/test/java/org/elasticsearch/h3/HexRingTests.java
@@ -38,7 +38,7 @@ public void testHexRing() {
for (int i = 0; i < 500; i++) {
double lat = GeoTestUtil.nextLatitude();
double lon = GeoTestUtil.nextLongitude();
- for (int res = 0; res <= Constants.MAX_H3_RES; res++) {
+ for (int res = 0; res <= H3.MAX_H3_RES; res++) {
String origin = H3.geoToH3Address(lat, lon, res);
assertFalse(H3.areNeighborCells(origin, origin));
String[] ring = H3.hexRing(origin);
diff --git a/libs/simdvec/build.gradle b/libs/simdvec/build.gradle
index 5a523a19d4b68..dab5c25b34679 100644
--- a/libs/simdvec/build.gradle
+++ b/libs/simdvec/build.gradle
@@ -23,6 +23,20 @@ dependencies {
}
}
+// compileMain21Java does not exist within idea (see MrJarPlugin) so we cannot reference directly by name
+tasks.matching { it.name == "compileMain21Java" }.configureEach {
+ options.compilerArgs << '--add-modules=jdk.incubator.vector'
+ // we remove Werror, since incubating suppression (-Xlint:-incubating)
+ // is only support since JDK 22
+ options.compilerArgs -= '-Werror'
+}
+
+tasks.named('test').configure {
+ if (JavaVersion.current().majorVersion.toInteger() >= 21) {
+ jvmArgs '--add-modules=jdk.incubator.vector'
+ }
+}
+
tasks.withType(CheckForbiddenApisTask).configureEach {
replaceSignatureFiles 'jdk-signatures'
}
diff --git a/libs/simdvec/src/main/java/module-info.java b/libs/simdvec/src/main/java/module-info.java
index 64e685ba3cbb5..44f6e39d5dbab 100644
--- a/libs/simdvec/src/main/java/module-info.java
+++ b/libs/simdvec/src/main/java/module-info.java
@@ -10,6 +10,7 @@
module org.elasticsearch.simdvec {
requires org.elasticsearch.nativeaccess;
requires org.apache.lucene.core;
+ requires org.elasticsearch.logging;
exports org.elasticsearch.simdvec to org.elasticsearch.server;
}
diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java
new file mode 100644
index 0000000000000..91193d5fa6eaf
--- /dev/null
+++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec;
+
+import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;
+import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
+
+import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
+
+public class ESVectorUtil {
+
+ private static final ESVectorUtilSupport IMPL = ESVectorizationProvider.getInstance().getVectorUtilSupport();
+
+ public static long ipByteBinByte(byte[] q, byte[] d) {
+ if (q.length != d.length * B_QUERY) {
+ throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + B_QUERY + " x " + d.length);
+ }
+ return IMPL.ipByteBinByte(q, d);
+ }
+}
diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java
new file mode 100644
index 0000000000000..4a08096119d6a
--- /dev/null
+++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+import org.apache.lucene.util.BitUtil;
+
+final class DefaultESVectorUtilSupport implements ESVectorUtilSupport {
+
+ DefaultESVectorUtilSupport() {}
+
+ @Override
+ public long ipByteBinByte(byte[] q, byte[] d) {
+ return ipByteBinByteImpl(q, d);
+ }
+
+ public static long ipByteBinByteImpl(byte[] q, byte[] d) {
+ long ret = 0;
+ int size = d.length;
+ for (int i = 0; i < B_QUERY; i++) {
+ int r = 0;
+ long subRet = 0;
+ for (final int upperBound = d.length & -Integer.BYTES; r < upperBound; r += Integer.BYTES) {
+ subRet += Integer.bitCount((int) BitUtil.VH_NATIVE_INT.get(q, i * size + r) & (int) BitUtil.VH_NATIVE_INT.get(d, r));
+ }
+ for (; r < d.length; r++) {
+ subRet += Integer.bitCount((q[i * size + r] & d[r]) & 0xFF);
+ }
+ ret += subRet << i;
+ }
+ return ret;
+ }
+}
diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorizationProvider.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorizationProvider.java
new file mode 100644
index 0000000000000..6c0f7ed146b86
--- /dev/null
+++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorizationProvider.java
@@ -0,0 +1,23 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+final class DefaultESVectorizationProvider extends ESVectorizationProvider {
+ private final ESVectorUtilSupport vectorUtilSupport;
+
+ DefaultESVectorizationProvider() {
+ vectorUtilSupport = new DefaultESVectorUtilSupport();
+ }
+
+ @Override
+ public ESVectorUtilSupport getVectorUtilSupport() {
+ return vectorUtilSupport;
+ }
+}
diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java
new file mode 100644
index 0000000000000..d7611173ca693
--- /dev/null
+++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java
@@ -0,0 +1,17 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+public interface ESVectorUtilSupport {
+
+ short B_QUERY = 4;
+
+ long ipByteBinByte(byte[] q, byte[] d);
+}
diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java
new file mode 100644
index 0000000000000..e541c10e145bf
--- /dev/null
+++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+import java.util.Objects;
+
+public abstract class ESVectorizationProvider {
+
+ public static ESVectorizationProvider getInstance() {
+ return Objects.requireNonNull(
+ ESVectorizationProvider.Holder.INSTANCE,
+ "call to getInstance() from subclass of VectorizationProvider"
+ );
+ }
+
+ ESVectorizationProvider() {}
+
+ public abstract ESVectorUtilSupport getVectorUtilSupport();
+
+ // visible for tests
+ static ESVectorizationProvider lookup(boolean testMode) {
+ return new DefaultESVectorizationProvider();
+ }
+
+ /** This static holder class prevents classloading deadlock. */
+ private static final class Holder {
+ private Holder() {}
+
+ static final ESVectorizationProvider INSTANCE = lookup(false);
+ }
+}
diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java
new file mode 100644
index 0000000000000..5b7aab7ddfa48
--- /dev/null
+++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorizationProvider.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+import org.apache.lucene.util.Constants;
+import org.elasticsearch.logging.LogManager;
+import org.elasticsearch.logging.Logger;
+
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Optional;
+
+public abstract class ESVectorizationProvider {
+
+ protected static final Logger logger = LogManager.getLogger(ESVectorizationProvider.class);
+
+ public static ESVectorizationProvider getInstance() {
+ return Objects.requireNonNull(
+ ESVectorizationProvider.Holder.INSTANCE,
+ "call to getInstance() from subclass of VectorizationProvider"
+ );
+ }
+
+ ESVectorizationProvider() {}
+
+ public abstract ESVectorUtilSupport getVectorUtilSupport();
+
+ // visible for tests
+ static ESVectorizationProvider lookup(boolean testMode) {
+ final int runtimeVersion = Runtime.version().feature();
+ assert runtimeVersion >= 21;
+ if (runtimeVersion <= 23) {
+ // only use vector module with Hotspot VM
+ if (Constants.IS_HOTSPOT_VM == false) {
+ logger.warn("Java runtime is not using Hotspot VM; Java vector incubator API can't be enabled.");
+ return new DefaultESVectorizationProvider();
+ }
+ // is the incubator module present and readable (JVM providers may to exclude them or it is
+ // build with jlink)
+ final var vectorMod = lookupVectorModule();
+ if (vectorMod.isEmpty()) {
+ logger.warn(
+ "Java vector incubator module is not readable. "
+ + "For optimal vector performance, pass '--add-modules jdk.incubator.vector' to enable Vector API."
+ );
+ return new DefaultESVectorizationProvider();
+ }
+ vectorMod.ifPresent(ESVectorizationProvider.class.getModule()::addReads);
+ var impl = new PanamaESVectorizationProvider();
+ logger.info(
+ String.format(
+ Locale.ENGLISH,
+ "Java vector incubator API enabled; uses preferredBitSize=%d",
+ PanamaESVectorUtilSupport.VECTOR_BITSIZE
+ )
+ );
+ return impl;
+ } else {
+ logger.warn(
+ "You are running with unsupported Java "
+ + runtimeVersion
+ + ". To make full use of the Vector API, please update Elasticsearch."
+ );
+ }
+ return new DefaultESVectorizationProvider();
+ }
+
+ private static Optional lookupVectorModule() {
+ return Optional.ofNullable(ESVectorizationProvider.class.getModule().getLayer())
+ .orElse(ModuleLayer.boot())
+ .findModule("jdk.incubator.vector");
+ }
+
+ /** This static holder class prevents classloading deadlock. */
+ private static final class Holder {
+ private Holder() {}
+
+ static final ESVectorizationProvider INSTANCE = lookup(false);
+ }
+}
diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java
new file mode 100644
index 0000000000000..0e5827d046736
--- /dev/null
+++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+import jdk.incubator.vector.ByteVector;
+import jdk.incubator.vector.IntVector;
+import jdk.incubator.vector.LongVector;
+import jdk.incubator.vector.VectorOperators;
+import jdk.incubator.vector.VectorShape;
+import jdk.incubator.vector.VectorSpecies;
+
+import org.apache.lucene.util.Constants;
+
+public final class PanamaESVectorUtilSupport implements ESVectorUtilSupport {
+
+ static final int VECTOR_BITSIZE;
+
+ /** Whether integer vectors can be trusted to actually be fast. */
+ static final boolean HAS_FAST_INTEGER_VECTORS;
+
+ static {
+ // default to platform supported bitsize
+ VECTOR_BITSIZE = VectorShape.preferredShape().vectorBitSize();
+
+ // hotspot misses some SSE intrinsics, workaround it
+ // to be fair, they do document this thing only works well with AVX2/AVX3 and Neon
+ boolean isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && VECTOR_BITSIZE < 256;
+ HAS_FAST_INTEGER_VECTORS = isAMD64withoutAVX2 == false;
+ }
+
+ @Override
+ public long ipByteBinByte(byte[] q, byte[] d) {
+ // 128 / 8 == 16
+ if (d.length >= 16 && HAS_FAST_INTEGER_VECTORS) {
+ if (VECTOR_BITSIZE >= 256) {
+ return ipByteBin256(q, d);
+ } else if (VECTOR_BITSIZE == 128) {
+ return ipByteBin128(q, d);
+ }
+ }
+ return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d);
+ }
+
+ private static final VectorSpecies BYTE_SPECIES_128 = ByteVector.SPECIES_128;
+ private static final VectorSpecies BYTE_SPECIES_256 = ByteVector.SPECIES_256;
+
+ static long ipByteBin256(byte[] q, byte[] d) {
+ long subRet0 = 0;
+ long subRet1 = 0;
+ long subRet2 = 0;
+ long subRet3 = 0;
+ int i = 0;
+
+ if (d.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
+ int limit = ByteVector.SPECIES_256.loopBound(d.length);
+ var sum0 = LongVector.zero(LongVector.SPECIES_256);
+ var sum1 = LongVector.zero(LongVector.SPECIES_256);
+ var sum2 = LongVector.zero(LongVector.SPECIES_256);
+ var sum3 = LongVector.zero(LongVector.SPECIES_256);
+ for (; i < limit; i += ByteVector.SPECIES_256.length()) {
+ var vq0 = ByteVector.fromArray(BYTE_SPECIES_256, q, i).reinterpretAsLongs();
+ var vq1 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length).reinterpretAsLongs();
+ var vq2 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 2).reinterpretAsLongs();
+ var vq3 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 3).reinterpretAsLongs();
+ var vd = ByteVector.fromArray(BYTE_SPECIES_256, d, i).reinterpretAsLongs();
+ sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ }
+ subRet0 += sum0.reduceLanes(VectorOperators.ADD);
+ subRet1 += sum1.reduceLanes(VectorOperators.ADD);
+ subRet2 += sum2.reduceLanes(VectorOperators.ADD);
+ subRet3 += sum3.reduceLanes(VectorOperators.ADD);
+ }
+
+ if (d.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
+ var sum0 = LongVector.zero(LongVector.SPECIES_128);
+ var sum1 = LongVector.zero(LongVector.SPECIES_128);
+ var sum2 = LongVector.zero(LongVector.SPECIES_128);
+ var sum3 = LongVector.zero(LongVector.SPECIES_128);
+ int limit = ByteVector.SPECIES_128.loopBound(d.length);
+ for (; i < limit; i += ByteVector.SPECIES_128.length()) {
+ var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsLongs();
+ var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsLongs();
+ var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsLongs();
+ var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsLongs();
+ var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsLongs();
+ sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
+ }
+ subRet0 += sum0.reduceLanes(VectorOperators.ADD);
+ subRet1 += sum1.reduceLanes(VectorOperators.ADD);
+ subRet2 += sum2.reduceLanes(VectorOperators.ADD);
+ subRet3 += sum3.reduceLanes(VectorOperators.ADD);
+ }
+ // tail as bytes
+ for (; i < d.length; i++) {
+ subRet0 += Integer.bitCount((q[i] & d[i]) & 0xFF);
+ subRet1 += Integer.bitCount((q[i + d.length] & d[i]) & 0xFF);
+ subRet2 += Integer.bitCount((q[i + 2 * d.length] & d[i]) & 0xFF);
+ subRet3 += Integer.bitCount((q[i + 3 * d.length] & d[i]) & 0xFF);
+ }
+ return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
+ }
+
+ public static long ipByteBin128(byte[] q, byte[] d) {
+ long subRet0 = 0;
+ long subRet1 = 0;
+ long subRet2 = 0;
+ long subRet3 = 0;
+ int i = 0;
+
+ var sum0 = IntVector.zero(IntVector.SPECIES_128);
+ var sum1 = IntVector.zero(IntVector.SPECIES_128);
+ var sum2 = IntVector.zero(IntVector.SPECIES_128);
+ var sum3 = IntVector.zero(IntVector.SPECIES_128);
+ int limit = ByteVector.SPECIES_128.loopBound(d.length);
+ for (; i < limit; i += ByteVector.SPECIES_128.length()) {
+ var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsInts();
+ var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsInts();
+ var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsInts();
+ var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsInts();
+ var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsInts();
+ sum0 = sum0.add(vd.and(vq0).lanewise(VectorOperators.BIT_COUNT));
+ sum1 = sum1.add(vd.and(vq1).lanewise(VectorOperators.BIT_COUNT));
+ sum2 = sum2.add(vd.and(vq2).lanewise(VectorOperators.BIT_COUNT));
+ sum3 = sum3.add(vd.and(vq3).lanewise(VectorOperators.BIT_COUNT));
+ }
+ subRet0 += sum0.reduceLanes(VectorOperators.ADD);
+ subRet1 += sum1.reduceLanes(VectorOperators.ADD);
+ subRet2 += sum2.reduceLanes(VectorOperators.ADD);
+ subRet3 += sum3.reduceLanes(VectorOperators.ADD);
+ // tail as bytes
+ for (; i < d.length; i++) {
+ int dValue = d[i];
+ subRet0 += Integer.bitCount((dValue & q[i]) & 0xFF);
+ subRet1 += Integer.bitCount((dValue & q[i + d.length]) & 0xFF);
+ subRet2 += Integer.bitCount((dValue & q[i + 2 * d.length]) & 0xFF);
+ subRet3 += Integer.bitCount((dValue & q[i + 3 * d.length]) & 0xFF);
+ }
+ return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
+ }
+}
diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorizationProvider.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorizationProvider.java
new file mode 100644
index 0000000000000..62d25d79487ed
--- /dev/null
+++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorizationProvider.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+final class PanamaESVectorizationProvider extends ESVectorizationProvider {
+
+ private final ESVectorUtilSupport vectorUtilSupport;
+
+ PanamaESVectorizationProvider() {
+ vectorUtilSupport = new PanamaESVectorUtilSupport();
+ }
+
+ @Override
+ public ESVectorUtilSupport getVectorUtilSupport() {
+ return vectorUtilSupport;
+ }
+}
diff --git a/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java
new file mode 100644
index 0000000000000..0dbc41c0c1055
--- /dev/null
+++ b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java
@@ -0,0 +1,130 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec;
+
+import org.elasticsearch.simdvec.internal.vectorization.BaseVectorizationTests;
+import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
+
+import java.util.Arrays;
+
+import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
+
+public class ESVectorUtilTests extends BaseVectorizationTests {
+
+ static final ESVectorizationProvider defaultedProvider = BaseVectorizationTests.defaultProvider();
+ static final ESVectorizationProvider defOrPanamaProvider = BaseVectorizationTests.maybePanamaProvider();
+
+ public void testIpByteBinInvariants() {
+ int iterations = atLeast(10);
+ for (int i = 0; i < iterations; i++) {
+ int size = randomIntBetween(1, 10);
+ var d = new byte[size];
+ var q = new byte[size * B_QUERY - 1];
+ expectThrows(IllegalArgumentException.class, () -> ESVectorUtil.ipByteBinByte(q, d));
+ }
+ }
+
+ public void testBasicIpByteBin() {
+ testBasicIpByteBinImpl(ESVectorUtil::ipByteBinByte);
+ testBasicIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
+ testBasicIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
+ }
+
+ interface IpByteBin {
+ long apply(byte[] q, byte[] d);
+ }
+
+ void testBasicIpByteBinImpl(IpByteBin ipByteBinFunc) {
+ assertEquals(15L, ipByteBinFunc.apply(new byte[] { 1, 1, 1, 1 }, new byte[] { 1 }));
+ assertEquals(30L, ipByteBinFunc.apply(new byte[] { 1, 2, 1, 2, 1, 2, 1, 2 }, new byte[] { 1, 2 }));
+
+ var d = new byte[] { 1, 2, 3 };
+ var q = new byte[] { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3 };
+ assert scalarIpByteBin(q, d) == 60L; // 4 + 8 + 16 + 32
+ assertEquals(60L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4 };
+ q = new byte[] { 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 };
+ assert scalarIpByteBin(q, d) == 75L; // 5 + 10 + 20 + 40
+ assertEquals(75L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4, 5 };
+ q = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 };
+ assert scalarIpByteBin(q, d) == 105L; // 7 + 14 + 28 + 56
+ assertEquals(105L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4, 5, 6 };
+ q = new byte[] { 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6 };
+ assert scalarIpByteBin(q, d) == 135L; // 9 + 18 + 36 + 72
+ assertEquals(135L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4, 5, 6, 7 };
+ q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7 };
+ assert scalarIpByteBin(q, d) == 180L; // 12 + 24 + 48 + 96
+ assertEquals(180L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
+ q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
+ assert scalarIpByteBin(q, d) == 195L; // 13 + 26 + 52 + 104
+ assertEquals(195L, ipByteBinFunc.apply(q, d));
+
+ d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ assert scalarIpByteBin(q, d) == 225L; // 15 + 30 + 60 + 120
+ assertEquals(225L, ipByteBinFunc.apply(q, d));
+ }
+
+ public void testIpByteBin() {
+ testIpByteBinImpl(ESVectorUtil::ipByteBinByte);
+ testIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
+ testIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
+ }
+
+ void testIpByteBinImpl(IpByteBin ipByteBinFunc) {
+ int iterations = atLeast(50);
+ for (int i = 0; i < iterations; i++) {
+ int size = random().nextInt(5000);
+ var d = new byte[size];
+ var q = new byte[size * B_QUERY];
+ random().nextBytes(d);
+ random().nextBytes(q);
+ assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
+
+ Arrays.fill(d, Byte.MAX_VALUE);
+ Arrays.fill(q, Byte.MAX_VALUE);
+ assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
+
+ Arrays.fill(d, Byte.MIN_VALUE);
+ Arrays.fill(q, Byte.MIN_VALUE);
+ assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
+ }
+ }
+
+ static int scalarIpByteBin(byte[] q, byte[] d) {
+ int res = 0;
+ for (int i = 0; i < B_QUERY; i++) {
+ res += (popcount(q, i * d.length, d, d.length) << i);
+ }
+ return res;
+ }
+
+ public static int popcount(byte[] a, int aOffset, byte[] b, int length) {
+ int res = 0;
+ for (int j = 0; j < length; j++) {
+ int value = (a[aOffset + j] & b[j]) & 0xFF;
+ for (int k = 0; k < Byte.SIZE; k++) {
+ if ((value & (1 << k)) != 0) {
+ ++res;
+ }
+ }
+ }
+ return res;
+ }
+}
diff --git a/libs/simdvec/src/test/java/org/elasticsearch/simdvec/internal/vectorization/BaseVectorizationTests.java b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/internal/vectorization/BaseVectorizationTests.java
new file mode 100644
index 0000000000000..f2bc8a11b04aa
--- /dev/null
+++ b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/internal/vectorization/BaseVectorizationTests.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.simdvec.internal.vectorization;
+
+import org.elasticsearch.test.ESTestCase;
+import org.junit.Before;
+
+public class BaseVectorizationTests extends ESTestCase {
+
+ @Before
+ public void sanity() {
+ assert Runtime.version().feature() < 21 || ModuleLayer.boot().findModule("jdk.incubator.vector").isPresent();
+ }
+
+ public static ESVectorizationProvider defaultProvider() {
+ return new DefaultESVectorizationProvider();
+ }
+
+ public static ESVectorizationProvider maybePanamaProvider() {
+ return ESVectorizationProvider.lookup(true);
+ }
+}
diff --git a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java
index 5455daf0a79ec..227557590731e 100644
--- a/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java
+++ b/modules/aggregations/src/test/java/org/elasticsearch/aggregations/bucket/histogram/InternalAutoDateHistogramTests.java
@@ -9,7 +9,6 @@
package org.elasticsearch.aggregations.bucket.histogram;
-import org.elasticsearch.TransportVersion;
import org.elasticsearch.TransportVersions;
import org.elasticsearch.aggregations.bucket.AggregationMultiBucketAggregationTestCase;
import org.elasticsearch.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.RoundingInfo;
@@ -28,7 +27,6 @@
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.test.InternalAggregationTestCase;
-import org.elasticsearch.test.TransportVersionUtils;
import java.io.IOException;
import java.time.Instant;
@@ -459,33 +457,6 @@ public void testCreateWithReplacementBuckets() {
assertThat(copy.getInterval(), equalTo(orig.getInterval()));
}
- public void testSerializationPre830() throws IOException {
- // we need to test without sub-aggregations, otherwise we need to also update the interval within the inner aggs
- InternalAutoDateHistogram instance = createTestInstance(
- randomAlphaOfLengthBetween(3, 7),
- createTestMetadata(),
- InternalAggregations.EMPTY
- );
- TransportVersion version = TransportVersionUtils.randomVersionBetween(
- random(),
- TransportVersions.MINIMUM_COMPATIBLE,
- TransportVersionUtils.getPreviousVersion(TransportVersions.V_8_3_0)
- );
- InternalAutoDateHistogram deserialized = copyInstance(instance, version);
- assertEquals(1, deserialized.getBucketInnerInterval());
-
- InternalAutoDateHistogram modified = new InternalAutoDateHistogram(
- deserialized.getName(),
- deserialized.getBuckets(),
- deserialized.getTargetBuckets(),
- deserialized.getBucketInfo(),
- deserialized.getFormatter(),
- deserialized.getMetadata(),
- instance.getBucketInnerInterval()
- );
- assertEqualInstances(instance, modified);
- }
-
public void testReadFromPre830() throws IOException {
byte[] bytes = Base64.getDecoder()
.decode(
diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml
index d9298a832e650..82371c973407c 100644
--- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml
+++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml
@@ -30,7 +30,7 @@ setup:
cluster_features: "gte_v8.15.0"
reason: fixed in 8.15.0
- do:
- catch: /Cannot format stat \[sum\] with format \[DocValueFormat.DateTime\(format\[date_hour_minute_second_millis\] locale\[\], Z, MILLISECONDS\)\]/
+ catch: /Cannot format stat \[sum\] with format \[DocValueFormat.DateTime\(format\[date_hour_minute_second_millis\] locale\[(en)?\], Z, MILLISECONDS\)\]/
search:
index: test_date
body:
diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java
index 515d07103bff8..8b29b1609711f 100644
--- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java
+++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/qa/DataGenerationHelper.java
@@ -12,25 +12,18 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.index.mapper.Mapper;
-import org.elasticsearch.index.mapper.ObjectMapper;
import org.elasticsearch.logsdb.datageneration.DataGenerator;
import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification;
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
-import org.elasticsearch.logsdb.datageneration.datasource.DataSourceHandler;
-import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
-import org.elasticsearch.logsdb.datageneration.datasource.DataSourceResponse;
import org.elasticsearch.logsdb.datageneration.fields.PredefinedField;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.function.Consumer;
public class DataGenerationHelper {
- private final ObjectMapper.Subobjects subobjects;
private final boolean keepArraySource;
private final DataGenerator dataGenerator;
@@ -40,44 +33,10 @@ public DataGenerationHelper() {
}
public DataGenerationHelper(Consumer builderConfigurator) {
- // TODO enable subobjects: auto
- // It is disabled because it currently does not have auto flattening and that results in asserts being triggered when using copy_to.
- this.subobjects = ESTestCase.randomValueOtherThan(
- ObjectMapper.Subobjects.AUTO,
- () -> ESTestCase.randomFrom(ObjectMapper.Subobjects.values())
- );
this.keepArraySource = ESTestCase.randomBoolean();
- var specificationBuilder = DataGeneratorSpecification.builder().withFullyDynamicMapping(ESTestCase.randomBoolean());
- if (subobjects != ObjectMapper.Subobjects.ENABLED) {
- specificationBuilder = specificationBuilder.withNestedFieldsLimit(0);
- }
-
- specificationBuilder.withDataSourceHandlers(List.of(new DataSourceHandler() {
- @Override
- public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) {
- if (subobjects == ObjectMapper.Subobjects.ENABLED) {
- // Use default behavior
- return null;
- }
-
- assert request.isNested() == false;
-
- // "enabled: false" is not compatible with subobjects: false
- // "dynamic: false/strict/runtime" is not compatible with subobjects: false
- return new DataSourceResponse.ObjectMappingParametersGenerator(() -> {
- var parameters = new HashMap();
- parameters.put("subobjects", subobjects.toString());
- if (ESTestCase.randomBoolean()) {
- parameters.put("dynamic", "true");
- }
- if (ESTestCase.randomBoolean()) {
- parameters.put("enabled", "true");
- }
- return parameters;
- });
- }
- }))
+ var specificationBuilder = DataGeneratorSpecification.builder()
+ .withFullyDynamicMapping(ESTestCase.randomBoolean())
.withPredefinedFields(
List.of(
// Customized because it always needs doc_values for aggregations.
@@ -136,11 +95,7 @@ void logsDbMapping(XContentBuilder builder) throws IOException {
}
void standardMapping(XContentBuilder builder) throws IOException {
- if (subobjects != ObjectMapper.Subobjects.ENABLED) {
- dataGenerator.writeMapping(builder, Map.of("subobjects", subobjects.toString()));
- } else {
- dataGenerator.writeMapping(builder);
- }
+ dataGenerator.writeMapping(builder);
}
void logsDbSettings(Settings.Builder builder) {
diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java
index 73d8976c3a4b7..786f091e0c024 100644
--- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java
+++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/DatabaseNodeServiceIT.java
@@ -84,7 +84,7 @@ private void assertValidDatabase(DatabaseNodeService databaseNodeService, String
IpDatabase database = databaseNodeService.getDatabase(databaseFileName);
assertNotNull(database);
assertThat(database.getDatabaseType(), equalTo(databaseType));
- CountryResponse countryResponse = database.getCountry("89.160.20.128");
+ CountryResponse countryResponse = database.getResponse("89.160.20.128", GeoIpTestUtils::getCountry);
assertNotNull(countryResponse);
Country country = countryResponse.getCountry();
assertNotNull(country);
diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/ReloadingDatabasesWhilePerformingGeoLookupsIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/ReloadingDatabasesWhilePerformingGeoLookupsIT.java
index 2c7d5fbcc56b7..b28926673069d 100644
--- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/ReloadingDatabasesWhilePerformingGeoLookupsIT.java
+++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/ReloadingDatabasesWhilePerformingGeoLookupsIT.java
@@ -205,10 +205,10 @@ private static DatabaseNodeService createRegistry(Path geoIpConfigDir, Path geoI
private static void lazyLoadReaders(DatabaseNodeService databaseNodeService) throws IOException {
if (databaseNodeService.get("GeoLite2-City.mmdb") != null) {
databaseNodeService.get("GeoLite2-City.mmdb").getDatabaseType();
- databaseNodeService.get("GeoLite2-City.mmdb").getCity("2.125.160.216");
+ databaseNodeService.get("GeoLite2-City.mmdb").getResponse("2.125.160.216", GeoIpTestUtils::getCity);
}
databaseNodeService.get("GeoLite2-City-Test.mmdb").getDatabaseType();
- databaseNodeService.get("GeoLite2-City-Test.mmdb").getCity("2.125.160.216");
+ databaseNodeService.get("GeoLite2-City-Test.mmdb").getResponse("2.125.160.216", GeoIpTestUtils::getCity);
}
}
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java
index dccda0d58cfbf..128c16e163764 100644
--- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/Database.java
@@ -9,7 +9,6 @@
package org.elasticsearch.ingest.geoip;
-import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Nullable;
import java.util.Arrays;
@@ -19,18 +18,21 @@
import java.util.Set;
/**
- * A high-level representation of a kind of geoip database that is supported by the {@link GeoIpProcessor}.
+ * A high-level representation of a kind of ip location database that is supported by the {@link GeoIpProcessor}.
*
* A database has a set of properties that are valid to use with it (see {@link Database#properties()}),
* as well as a list of default properties to use if no properties are specified (see {@link Database#defaultProperties()}).
*
- * See especially {@link Database#getDatabase(String, String)} which is used to obtain instances of this class.
+ * Some database providers have similar concepts but might have slightly different properties associated with those types.
+ * This can be accommodated, for example, by having a Foo value and a separate FooV2 value where the 'V' should be read as
+ * 'variant' or 'variation'. A V-less Database type is inherently the first variant/variation (i.e. V1).
*/
enum Database {
City(
Set.of(
Property.IP,
+ Property.COUNTRY_IN_EUROPEAN_UNION,
Property.COUNTRY_ISO_CODE,
Property.CONTINENT_CODE,
Property.COUNTRY_NAME,
@@ -39,7 +41,9 @@ enum Database {
Property.REGION_NAME,
Property.CITY_NAME,
Property.TIMEZONE,
- Property.LOCATION
+ Property.LOCATION,
+ Property.POSTAL_CODE,
+ Property.ACCURACY_RADIUS
),
Set.of(
Property.COUNTRY_ISO_CODE,
@@ -52,7 +56,14 @@ enum Database {
)
),
Country(
- Set.of(Property.IP, Property.CONTINENT_CODE, Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE),
+ Set.of(
+ Property.IP,
+ Property.CONTINENT_CODE,
+ Property.CONTINENT_NAME,
+ Property.COUNTRY_NAME,
+ Property.COUNTRY_IN_EUROPEAN_UNION,
+ Property.COUNTRY_ISO_CODE
+ ),
Set.of(Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE)
),
Asn(
@@ -83,12 +94,15 @@ enum Database {
Enterprise(
Set.of(
Property.IP,
+ Property.COUNTRY_CONFIDENCE,
+ Property.COUNTRY_IN_EUROPEAN_UNION,
Property.COUNTRY_ISO_CODE,
Property.COUNTRY_NAME,
Property.CONTINENT_CODE,
Property.CONTINENT_NAME,
Property.REGION_ISO_CODE,
Property.REGION_NAME,
+ Property.CITY_CONFIDENCE,
Property.CITY_NAME,
Property.TIMEZONE,
Property.LOCATION,
@@ -107,7 +121,10 @@ enum Database {
Property.MOBILE_COUNTRY_CODE,
Property.MOBILE_NETWORK_CODE,
Property.USER_TYPE,
- Property.CONNECTION_TYPE
+ Property.CONNECTION_TYPE,
+ Property.POSTAL_CODE,
+ Property.POSTAL_CONFIDENCE,
+ Property.ACCURACY_RADIUS
),
Set.of(
Property.COUNTRY_ISO_CODE,
@@ -140,63 +157,20 @@ enum Database {
Property.MOBILE_COUNTRY_CODE,
Property.MOBILE_NETWORK_CODE
)
+ ),
+ AsnV2(
+ Set.of(
+ Property.IP,
+ Property.ASN,
+ Property.ORGANIZATION_NAME,
+ Property.NETWORK,
+ Property.DOMAIN,
+ Property.COUNTRY_ISO_CODE,
+ Property.TYPE
+ ),
+ Set.of(Property.IP, Property.ASN, Property.ORGANIZATION_NAME, Property.NETWORK)
);
- private static final String CITY_DB_SUFFIX = "-City";
- private static final String COUNTRY_DB_SUFFIX = "-Country";
- private static final String ASN_DB_SUFFIX = "-ASN";
- private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP";
- private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type";
- private static final String DOMAIN_DB_SUFFIX = "-Domain";
- private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise";
- private static final String ISP_DB_SUFFIX = "-ISP";
-
- @Nullable
- private static Database getMaxmindDatabase(final String databaseType) {
- if (databaseType.endsWith(Database.CITY_DB_SUFFIX)) {
- return Database.City;
- } else if (databaseType.endsWith(Database.COUNTRY_DB_SUFFIX)) {
- return Database.Country;
- } else if (databaseType.endsWith(Database.ASN_DB_SUFFIX)) {
- return Database.Asn;
- } else if (databaseType.endsWith(Database.ANONYMOUS_IP_DB_SUFFIX)) {
- return Database.AnonymousIp;
- } else if (databaseType.endsWith(Database.CONNECTION_TYPE_DB_SUFFIX)) {
- return Database.ConnectionType;
- } else if (databaseType.endsWith(Database.DOMAIN_DB_SUFFIX)) {
- return Database.Domain;
- } else if (databaseType.endsWith(Database.ENTERPRISE_DB_SUFFIX)) {
- return Database.Enterprise;
- } else if (databaseType.endsWith(Database.ISP_DB_SUFFIX)) {
- return Database.Isp;
- } else {
- return null; // no match was found
- }
- }
-
- /**
- * Parses the passed-in databaseType (presumably from the passed-in databaseFile) and return the Database instance that is
- * associated with that databaseType.
- *
- * @param databaseType the database type String from the metadata of the database file
- * @param databaseFile the database file from which the database type was obtained
- * @throws IllegalArgumentException if the databaseType is not associated with a Database instance
- * @return the Database instance that is associated with the databaseType
- */
- public static Database getDatabase(final String databaseType, final String databaseFile) {
- Database database = null;
-
- if (Strings.hasText(databaseType)) {
- database = getMaxmindDatabase(databaseType);
- }
-
- if (database == null) {
- throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
- }
-
- return database;
- }
-
private final Set properties;
private final Set defaultProperties;
@@ -245,12 +219,15 @@ public Set parseProperties(@Nullable final List propertyNames)
enum Property {
IP,
+ COUNTRY_CONFIDENCE,
+ COUNTRY_IN_EUROPEAN_UNION,
COUNTRY_ISO_CODE,
COUNTRY_NAME,
CONTINENT_CODE,
CONTINENT_NAME,
REGION_ISO_CODE,
REGION_NAME,
+ CITY_CONFIDENCE,
CITY_NAME,
TIMEZONE,
LOCATION,
@@ -269,7 +246,11 @@ enum Property {
MOBILE_COUNTRY_CODE,
MOBILE_NETWORK_CODE,
CONNECTION_TYPE,
- USER_TYPE;
+ USER_TYPE,
+ TYPE,
+ POSTAL_CODE,
+ POSTAL_CONFIDENCE,
+ ACCURACY_RADIUS;
/**
* Parses a string representation of a property into an actual Property instance. Not all properties that exist are
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
index e160c8ad1543f..120afe0e9e815 100644
--- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java
@@ -9,18 +9,8 @@
package org.elasticsearch.ingest.geoip;
-import com.maxmind.db.DatabaseRecord;
-import com.maxmind.db.Network;
import com.maxmind.db.NoCache;
import com.maxmind.db.Reader;
-import com.maxmind.geoip2.model.AnonymousIpResponse;
-import com.maxmind.geoip2.model.AsnResponse;
-import com.maxmind.geoip2.model.CityResponse;
-import com.maxmind.geoip2.model.ConnectionTypeResponse;
-import com.maxmind.geoip2.model.CountryResponse;
-import com.maxmind.geoip2.model.DomainResponse;
-import com.maxmind.geoip2.model.EnterpriseResponse;
-import com.maxmind.geoip2.model.IspResponse;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -28,8 +18,6 @@
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.CheckedBiFunction;
import org.elasticsearch.common.CheckedSupplier;
-import org.elasticsearch.common.network.InetAddresses;
-import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.core.Booleans;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.core.Nullable;
@@ -37,19 +25,16 @@
import java.io.File;
import java.io.IOException;
-import java.net.InetAddress;
import java.nio.file.Files;
import java.nio.file.Path;
-import java.util.List;
import java.util.Objects;
-import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Facilitates lazy loading of the database reader, so that when the geoip plugin is installed, but not used,
* no memory is being wasted on the database reader.
*/
-class DatabaseReaderLazyLoader implements IpDatabase {
+public class DatabaseReaderLazyLoader implements IpDatabase {
private static final boolean LOAD_DATABASE_ON_HEAP = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
@@ -96,94 +81,6 @@ public final String getDatabaseType() throws IOException {
return databaseType.get();
}
- @Nullable
- @Override
- public CityResponse getCity(String ipAddress) {
- return getResponse(ipAddress, (reader, ip) -> lookup(reader, ip, CityResponse.class, CityResponse::new));
- }
-
- @Nullable
- @Override
- public CountryResponse getCountry(String ipAddress) {
- return getResponse(ipAddress, (reader, ip) -> lookup(reader, ip, CountryResponse.class, CountryResponse::new));
- }
-
- @Nullable
- @Override
- public AsnResponse getAsn(String ipAddress) {
- return getResponse(
- ipAddress,
- (reader, ip) -> lookup(
- reader,
- ip,
- AsnResponse.class,
- (response, responseIp, network, locales) -> new AsnResponse(response, responseIp, network)
- )
- );
- }
-
- @Nullable
- @Override
- public AnonymousIpResponse getAnonymousIp(String ipAddress) {
- return getResponse(
- ipAddress,
- (reader, ip) -> lookup(
- reader,
- ip,
- AnonymousIpResponse.class,
- (response, responseIp, network, locales) -> new AnonymousIpResponse(response, responseIp, network)
- )
- );
- }
-
- @Nullable
- @Override
- public ConnectionTypeResponse getConnectionType(String ipAddress) {
- return getResponse(
- ipAddress,
- (reader, ip) -> lookup(
- reader,
- ip,
- ConnectionTypeResponse.class,
- (response, responseIp, network, locales) -> new ConnectionTypeResponse(response, responseIp, network)
- )
- );
- }
-
- @Nullable
- @Override
- public DomainResponse getDomain(String ipAddress) {
- return getResponse(
- ipAddress,
- (reader, ip) -> lookup(
- reader,
- ip,
- DomainResponse.class,
- (response, responseIp, network, locales) -> new DomainResponse(response, responseIp, network)
- )
- );
- }
-
- @Nullable
- @Override
- public EnterpriseResponse getEnterprise(String ipAddress) {
- return getResponse(ipAddress, (reader, ip) -> lookup(reader, ip, EnterpriseResponse.class, EnterpriseResponse::new));
- }
-
- @Nullable
- @Override
- public IspResponse getIsp(String ipAddress) {
- return getResponse(
- ipAddress,
- (reader, ip) -> lookup(
- reader,
- ip,
- IspResponse.class,
- (response, responseIp, network, locales) -> new IspResponse(response, responseIp, network)
- )
- );
- }
-
boolean preLookup() {
return currentUsages.updateAndGet(current -> current < 0 ? current : current + 1) > 0;
}
@@ -199,14 +96,12 @@ int current() {
return currentUsages.get();
}
+ @Override
@Nullable
- private RESPONSE getResponse(
- String ipAddress,
- CheckedBiFunction, Exception> responseProvider
- ) {
+ public RESPONSE getResponse(String ipAddress, CheckedBiFunction responseProvider) {
return cache.putIfAbsent(ipAddress, databasePath.toString(), ip -> {
try {
- return responseProvider.apply(get(), ipAddress).orElse(null);
+ return responseProvider.apply(get(), ipAddress);
} catch (Exception e) {
throw ExceptionsHelper.convertToRuntime(e);
}
@@ -263,23 +158,6 @@ private static File pathToFile(Path databasePath) {
return databasePath.toFile();
}
- @FunctionalInterface
- private interface ResponseBuilder {
- RESPONSE build(RESPONSE response, String responseIp, Network network, List locales);
- }
-
- private Optional lookup(Reader reader, String ip, Class clazz, ResponseBuilder builder)
- throws IOException {
- InetAddress inetAddress = InetAddresses.forString(ip);
- DatabaseRecord record = reader.getRecord(inetAddress, clazz);
- RESPONSE result = record.getData();
- if (result == null) {
- return Optional.empty();
- } else {
- return Optional.of(builder.build(result, NetworkAddress.format(inetAddress), record.getNetwork(), List.of("en")));
- }
- }
-
long getBuildDateMillis() throws IOException {
if (buildDate.get() == null) {
synchronized (buildDate) {
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpCache.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpCache.java
index 335331ac0ab9d..d9c9c3aaf3266 100644
--- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpCache.java
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpCache.java
@@ -26,7 +26,7 @@
* cost of deserialization for each lookup (cached or not). This comes at slight expense of higher memory usage, but significant
* reduction of CPU usage.
*/
-final class GeoIpCache {
+public final class GeoIpCache {
/**
* Internal-only sentinel object for recording that a result from the geoip database was null (i.e. there was no result). By caching
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
index ce160b060ae4c..e2b516bf5b943 100644
--- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java
@@ -9,23 +9,6 @@
package org.elasticsearch.ingest.geoip;
-import com.maxmind.db.Network;
-import com.maxmind.geoip2.model.AnonymousIpResponse;
-import com.maxmind.geoip2.model.AsnResponse;
-import com.maxmind.geoip2.model.CityResponse;
-import com.maxmind.geoip2.model.ConnectionTypeResponse;
-import com.maxmind.geoip2.model.ConnectionTypeResponse.ConnectionType;
-import com.maxmind.geoip2.model.CountryResponse;
-import com.maxmind.geoip2.model.DomainResponse;
-import com.maxmind.geoip2.model.EnterpriseResponse;
-import com.maxmind.geoip2.model.IspResponse;
-import com.maxmind.geoip2.record.City;
-import com.maxmind.geoip2.record.Continent;
-import com.maxmind.geoip2.record.Country;
-import com.maxmind.geoip2.record.Location;
-import com.maxmind.geoip2.record.Subdivision;
-
-import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.CheckedSupplier;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
@@ -34,10 +17,10 @@
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import org.elasticsearch.ingest.geoip.Database.Property;
+import org.elasticsearch.ingest.geoip.IpDataLookupFactories.IpDataLookupFactory;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -60,7 +43,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
private final Supplier isValid;
private final String targetField;
private final CheckedSupplier supplier;
- private final Set properties;
+ private final IpDataLookup ipDataLookup;
private final boolean ignoreMissing;
private final boolean firstOnly;
private final String databaseFile;
@@ -73,7 +56,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
* @param supplier a supplier of a geo-IP database reader; ideally this is lazily-loaded once on first use
* @param isValid a supplier that determines if the available database files are up-to-date and license compliant
* @param targetField the target field
- * @param properties the properties; ideally this is lazily-loaded once on first use
+ * @param ipDataLookup a lookup capable of retrieving a result from an available geo-IP database reader
* @param ignoreMissing true if documents with a missing value for the field should be ignored
* @param firstOnly true if only first result should be returned in case of array
* @param databaseFile the name of the database file being queried; used only for tagging documents if the database is unavailable
@@ -85,7 +68,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
final CheckedSupplier supplier,
final Supplier isValid,
final String targetField,
- final Set properties,
+ final IpDataLookup ipDataLookup,
final boolean ignoreMissing,
final boolean firstOnly,
final String databaseFile
@@ -95,7 +78,7 @@ public final class GeoIpProcessor extends AbstractProcessor {
this.isValid = isValid;
this.targetField = targetField;
this.supplier = supplier;
- this.properties = properties;
+ this.ipDataLookup = ipDataLookup;
this.ignoreMissing = ignoreMissing;
this.firstOnly = firstOnly;
this.databaseFile = databaseFile;
@@ -127,7 +110,7 @@ public IngestDocument execute(IngestDocument document) throws IOException {
}
if (ip instanceof String ipString) {
- Map data = getGeoData(ipDatabase, ipString);
+ Map data = ipDataLookup.getData(ipDatabase, ipString);
if (data.isEmpty() == false) {
document.setFieldValue(targetField, data);
}
@@ -138,7 +121,7 @@ public IngestDocument execute(IngestDocument document) throws IOException {
if (ipAddr instanceof String == false) {
throw new IllegalArgumentException("array in field [" + field + "] should only contain strings");
}
- Map data = getGeoData(ipDatabase, (String) ipAddr);
+ Map data = ipDataLookup.getData(ipDatabase, (String) ipAddr);
if (data.isEmpty()) {
dataList.add(null);
continue;
@@ -161,26 +144,6 @@ public IngestDocument execute(IngestDocument document) throws IOException {
return document;
}
- private Map getGeoData(IpDatabase ipDatabase, String ipAddress) throws IOException {
- final String databaseType = ipDatabase.getDatabaseType();
- final Database database;
- try {
- database = Database.getDatabase(databaseType, databaseFile);
- } catch (IllegalArgumentException e) {
- throw new ElasticsearchParseException(e.getMessage(), e);
- }
- return switch (database) {
- case City -> retrieveCityGeoData(ipDatabase, ipAddress);
- case Country -> retrieveCountryGeoData(ipDatabase, ipAddress);
- case Asn -> retrieveAsnGeoData(ipDatabase, ipAddress);
- case AnonymousIp -> retrieveAnonymousIpGeoData(ipDatabase, ipAddress);
- case ConnectionType -> retrieveConnectionTypeGeoData(ipDatabase, ipAddress);
- case Domain -> retrieveDomainGeoData(ipDatabase, ipAddress);
- case Enterprise -> retrieveEnterpriseGeoData(ipDatabase, ipAddress);
- case Isp -> retrieveIspGeoData(ipDatabase, ipAddress);
- };
- }
-
@Override
public String getType() {
return TYPE;
@@ -199,478 +162,7 @@ String getDatabaseType() throws IOException {
}
Set getProperties() {
- return properties;
- }
-
- private Map retrieveCityGeoData(IpDatabase ipDatabase, String ipAddress) {
- CityResponse response = ipDatabase.getCity(ipAddress);
- if (response == null) {
- return Map.of();
- }
- Country country = response.getCountry();
- City city = response.getCity();
- Location location = response.getLocation();
- Continent continent = response.getContinent();
- Subdivision subdivision = response.getMostSpecificSubdivision();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getTraits().getIpAddress());
- case COUNTRY_ISO_CODE -> {
- String countryIsoCode = country.getIsoCode();
- if (countryIsoCode != null) {
- geoData.put("country_iso_code", countryIsoCode);
- }
- }
- case COUNTRY_NAME -> {
- String countryName = country.getName();
- if (countryName != null) {
- geoData.put("country_name", countryName);
- }
- }
- case CONTINENT_CODE -> {
- String continentCode = continent.getCode();
- if (continentCode != null) {
- geoData.put("continent_code", continentCode);
- }
- }
- case CONTINENT_NAME -> {
- String continentName = continent.getName();
- if (continentName != null) {
- geoData.put("continent_name", continentName);
- }
- }
- case REGION_ISO_CODE -> {
- // ISO 3166-2 code for country subdivisions.
- // See iso.org/iso-3166-country-codes.html
- String countryIso = country.getIsoCode();
- String subdivisionIso = subdivision.getIsoCode();
- if (countryIso != null && subdivisionIso != null) {
- String regionIsoCode = countryIso + "-" + subdivisionIso;
- geoData.put("region_iso_code", regionIsoCode);
- }
- }
- case REGION_NAME -> {
- String subdivisionName = subdivision.getName();
- if (subdivisionName != null) {
- geoData.put("region_name", subdivisionName);
- }
- }
- case CITY_NAME -> {
- String cityName = city.getName();
- if (cityName != null) {
- geoData.put("city_name", cityName);
- }
- }
- case TIMEZONE -> {
- String locationTimeZone = location.getTimeZone();
- if (locationTimeZone != null) {
- geoData.put("timezone", locationTimeZone);
- }
- }
- case LOCATION -> {
- Double latitude = location.getLatitude();
- Double longitude = location.getLongitude();
- if (latitude != null && longitude != null) {
- Map locationObject = new HashMap<>();
- locationObject.put("lat", latitude);
- locationObject.put("lon", longitude);
- geoData.put("location", locationObject);
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveCountryGeoData(IpDatabase ipDatabase, String ipAddress) {
- CountryResponse response = ipDatabase.getCountry(ipAddress);
- if (response == null) {
- return Map.of();
- }
- Country country = response.getCountry();
- Continent continent = response.getContinent();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getTraits().getIpAddress());
- case COUNTRY_ISO_CODE -> {
- String countryIsoCode = country.getIsoCode();
- if (countryIsoCode != null) {
- geoData.put("country_iso_code", countryIsoCode);
- }
- }
- case COUNTRY_NAME -> {
- String countryName = country.getName();
- if (countryName != null) {
- geoData.put("country_name", countryName);
- }
- }
- case CONTINENT_CODE -> {
- String continentCode = continent.getCode();
- if (continentCode != null) {
- geoData.put("continent_code", continentCode);
- }
- }
- case CONTINENT_NAME -> {
- String continentName = continent.getName();
- if (continentName != null) {
- geoData.put("continent_name", continentName);
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveAsnGeoData(IpDatabase ipDatabase, String ipAddress) {
- AsnResponse response = ipDatabase.getAsn(ipAddress);
- if (response == null) {
- return Map.of();
- }
- Long asn = response.getAutonomousSystemNumber();
- String organizationName = response.getAutonomousSystemOrganization();
- Network network = response.getNetwork();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getIpAddress());
- case ASN -> {
- if (asn != null) {
- geoData.put("asn", asn);
- }
- }
- case ORGANIZATION_NAME -> {
- if (organizationName != null) {
- geoData.put("organization_name", organizationName);
- }
- }
- case NETWORK -> {
- if (network != null) {
- geoData.put("network", network.toString());
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveAnonymousIpGeoData(IpDatabase ipDatabase, String ipAddress) {
- AnonymousIpResponse response = ipDatabase.getAnonymousIp(ipAddress);
- if (response == null) {
- return Map.of();
- }
-
- boolean isHostingProvider = response.isHostingProvider();
- boolean isTorExitNode = response.isTorExitNode();
- boolean isAnonymousVpn = response.isAnonymousVpn();
- boolean isAnonymous = response.isAnonymous();
- boolean isPublicProxy = response.isPublicProxy();
- boolean isResidentialProxy = response.isResidentialProxy();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getIpAddress());
- case HOSTING_PROVIDER -> {
- geoData.put("hosting_provider", isHostingProvider);
- }
- case TOR_EXIT_NODE -> {
- geoData.put("tor_exit_node", isTorExitNode);
- }
- case ANONYMOUS_VPN -> {
- geoData.put("anonymous_vpn", isAnonymousVpn);
- }
- case ANONYMOUS -> {
- geoData.put("anonymous", isAnonymous);
- }
- case PUBLIC_PROXY -> {
- geoData.put("public_proxy", isPublicProxy);
- }
- case RESIDENTIAL_PROXY -> {
- geoData.put("residential_proxy", isResidentialProxy);
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveConnectionTypeGeoData(IpDatabase ipDatabase, String ipAddress) {
- ConnectionTypeResponse response = ipDatabase.getConnectionType(ipAddress);
- if (response == null) {
- return Map.of();
- }
-
- ConnectionType connectionType = response.getConnectionType();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getIpAddress());
- case CONNECTION_TYPE -> {
- if (connectionType != null) {
- geoData.put("connection_type", connectionType.toString());
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveDomainGeoData(IpDatabase ipDatabase, String ipAddress) {
- DomainResponse response = ipDatabase.getDomain(ipAddress);
- if (response == null) {
- return Map.of();
- }
-
- String domain = response.getDomain();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getIpAddress());
- case DOMAIN -> {
- if (domain != null) {
- geoData.put("domain", domain);
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveEnterpriseGeoData(IpDatabase ipDatabase, String ipAddress) {
- EnterpriseResponse response = ipDatabase.getEnterprise(ipAddress);
- if (response == null) {
- return Map.of();
- }
-
- Country country = response.getCountry();
- City city = response.getCity();
- Location location = response.getLocation();
- Continent continent = response.getContinent();
- Subdivision subdivision = response.getMostSpecificSubdivision();
-
- Long asn = response.getTraits().getAutonomousSystemNumber();
- String organizationName = response.getTraits().getAutonomousSystemOrganization();
- Network network = response.getTraits().getNetwork();
-
- String isp = response.getTraits().getIsp();
- String ispOrganization = response.getTraits().getOrganization();
- String mobileCountryCode = response.getTraits().getMobileCountryCode();
- String mobileNetworkCode = response.getTraits().getMobileNetworkCode();
-
- boolean isHostingProvider = response.getTraits().isHostingProvider();
- boolean isTorExitNode = response.getTraits().isTorExitNode();
- boolean isAnonymousVpn = response.getTraits().isAnonymousVpn();
- boolean isAnonymous = response.getTraits().isAnonymous();
- boolean isPublicProxy = response.getTraits().isPublicProxy();
- boolean isResidentialProxy = response.getTraits().isResidentialProxy();
-
- String userType = response.getTraits().getUserType();
-
- String domain = response.getTraits().getDomain();
-
- ConnectionType connectionType = response.getTraits().getConnectionType();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getTraits().getIpAddress());
- case COUNTRY_ISO_CODE -> {
- String countryIsoCode = country.getIsoCode();
- if (countryIsoCode != null) {
- geoData.put("country_iso_code", countryIsoCode);
- }
- }
- case COUNTRY_NAME -> {
- String countryName = country.getName();
- if (countryName != null) {
- geoData.put("country_name", countryName);
- }
- }
- case CONTINENT_CODE -> {
- String continentCode = continent.getCode();
- if (continentCode != null) {
- geoData.put("continent_code", continentCode);
- }
- }
- case CONTINENT_NAME -> {
- String continentName = continent.getName();
- if (continentName != null) {
- geoData.put("continent_name", continentName);
- }
- }
- case REGION_ISO_CODE -> {
- // ISO 3166-2 code for country subdivisions.
- // See iso.org/iso-3166-country-codes.html
- String countryIso = country.getIsoCode();
- String subdivisionIso = subdivision.getIsoCode();
- if (countryIso != null && subdivisionIso != null) {
- String regionIsoCode = countryIso + "-" + subdivisionIso;
- geoData.put("region_iso_code", regionIsoCode);
- }
- }
- case REGION_NAME -> {
- String subdivisionName = subdivision.getName();
- if (subdivisionName != null) {
- geoData.put("region_name", subdivisionName);
- }
- }
- case CITY_NAME -> {
- String cityName = city.getName();
- if (cityName != null) {
- geoData.put("city_name", cityName);
- }
- }
- case TIMEZONE -> {
- String locationTimeZone = location.getTimeZone();
- if (locationTimeZone != null) {
- geoData.put("timezone", locationTimeZone);
- }
- }
- case LOCATION -> {
- Double latitude = location.getLatitude();
- Double longitude = location.getLongitude();
- if (latitude != null && longitude != null) {
- Map locationObject = new HashMap<>();
- locationObject.put("lat", latitude);
- locationObject.put("lon", longitude);
- geoData.put("location", locationObject);
- }
- }
- case ASN -> {
- if (asn != null) {
- geoData.put("asn", asn);
- }
- }
- case ORGANIZATION_NAME -> {
- if (organizationName != null) {
- geoData.put("organization_name", organizationName);
- }
- }
- case NETWORK -> {
- if (network != null) {
- geoData.put("network", network.toString());
- }
- }
- case HOSTING_PROVIDER -> {
- geoData.put("hosting_provider", isHostingProvider);
- }
- case TOR_EXIT_NODE -> {
- geoData.put("tor_exit_node", isTorExitNode);
- }
- case ANONYMOUS_VPN -> {
- geoData.put("anonymous_vpn", isAnonymousVpn);
- }
- case ANONYMOUS -> {
- geoData.put("anonymous", isAnonymous);
- }
- case PUBLIC_PROXY -> {
- geoData.put("public_proxy", isPublicProxy);
- }
- case RESIDENTIAL_PROXY -> {
- geoData.put("residential_proxy", isResidentialProxy);
- }
- case DOMAIN -> {
- if (domain != null) {
- geoData.put("domain", domain);
- }
- }
- case ISP -> {
- if (isp != null) {
- geoData.put("isp", isp);
- }
- }
- case ISP_ORGANIZATION_NAME -> {
- if (ispOrganization != null) {
- geoData.put("isp_organization_name", ispOrganization);
- }
- }
- case MOBILE_COUNTRY_CODE -> {
- if (mobileCountryCode != null) {
- geoData.put("mobile_country_code", mobileCountryCode);
- }
- }
- case MOBILE_NETWORK_CODE -> {
- if (mobileNetworkCode != null) {
- geoData.put("mobile_network_code", mobileNetworkCode);
- }
- }
- case USER_TYPE -> {
- if (userType != null) {
- geoData.put("user_type", userType);
- }
- }
- case CONNECTION_TYPE -> {
- if (connectionType != null) {
- geoData.put("connection_type", connectionType.toString());
- }
- }
- }
- }
- return geoData;
- }
-
- private Map retrieveIspGeoData(IpDatabase ipDatabase, String ipAddress) {
- IspResponse response = ipDatabase.getIsp(ipAddress);
- if (response == null) {
- return Map.of();
- }
-
- String isp = response.getIsp();
- String ispOrganization = response.getOrganization();
- String mobileNetworkCode = response.getMobileNetworkCode();
- String mobileCountryCode = response.getMobileCountryCode();
- Long asn = response.getAutonomousSystemNumber();
- String organizationName = response.getAutonomousSystemOrganization();
- Network network = response.getNetwork();
-
- Map geoData = new HashMap<>();
- for (Property property : this.properties) {
- switch (property) {
- case IP -> geoData.put("ip", response.getIpAddress());
- case ASN -> {
- if (asn != null) {
- geoData.put("asn", asn);
- }
- }
- case ORGANIZATION_NAME -> {
- if (organizationName != null) {
- geoData.put("organization_name", organizationName);
- }
- }
- case NETWORK -> {
- if (network != null) {
- geoData.put("network", network.toString());
- }
- }
- case ISP -> {
- if (isp != null) {
- geoData.put("isp", isp);
- }
- }
- case ISP_ORGANIZATION_NAME -> {
- if (ispOrganization != null) {
- geoData.put("isp_organization_name", ispOrganization);
- }
- }
- case MOBILE_COUNTRY_CODE -> {
- if (mobileCountryCode != null) {
- geoData.put("mobile_country_code", mobileCountryCode);
- }
- }
- case MOBILE_NETWORK_CODE -> {
- if (mobileNetworkCode != null) {
- geoData.put("mobile_network_code", mobileNetworkCode);
- }
- }
- }
- }
- return geoData;
+ return ipDataLookup.getProperties();
}
/**
@@ -752,19 +244,20 @@ public Processor create(
databaseType = ipDatabase.getDatabaseType();
}
- final Database database;
+ final IpDataLookupFactory factory;
try {
- database = Database.getDatabase(databaseType, databaseFile);
+ factory = IpDataLookupFactories.get(databaseType, databaseFile);
} catch (IllegalArgumentException e) {
throw newConfigurationException(TYPE, processorTag, "database_file", e.getMessage());
}
- final Set properties;
+ final IpDataLookup ipDataLookup;
try {
- properties = database.parseProperties(propertyNames);
+ ipDataLookup = factory.create(propertyNames);
} catch (IllegalArgumentException e) {
throw newConfigurationException(TYPE, processorTag, "properties", e.getMessage());
}
+
return new GeoIpProcessor(
processorTag,
description,
@@ -772,7 +265,7 @@ public Processor create(
new DatabaseVerifyingSupplier(ipDatabaseProvider, databaseFile, databaseType),
() -> ipDatabaseProvider.isValid(databaseFile),
targetField,
- properties,
+ ipDataLookup,
ignoreMissing,
firstOnly,
databaseFile
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookup.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookup.java
new file mode 100644
index 0000000000000..7442c8e930886
--- /dev/null
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookup.java
@@ -0,0 +1,31 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+interface IpDataLookup {
+ /**
+ * Gets data from the provided {@code ipDatabase} for the provided {@code ip}
+ *
+ * @param ipDatabase the database from which to lookup a result
+ * @param ip the ip address
+ * @return a map of data corresponding to the configured properties
+ * @throws IOException if the implementation encounters any problem while retrieving the response
+ */
+ Map getData(IpDatabase ipDatabase, String ip) throws IOException;
+
+ /**
+ * @return the set of properties this lookup will provide
+ */
+ Set getProperties();
+}
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java
new file mode 100644
index 0000000000000..3379fdff0633a
--- /dev/null
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDataLookupFactories.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.core.Nullable;
+
+import java.util.List;
+import java.util.Set;
+import java.util.function.Function;
+
+final class IpDataLookupFactories {
+
+ private IpDataLookupFactories() {
+ // utility class
+ }
+
+ interface IpDataLookupFactory {
+ IpDataLookup create(List properties);
+ }
+
+ private static final String CITY_DB_SUFFIX = "-City";
+ private static final String COUNTRY_DB_SUFFIX = "-Country";
+ private static final String ASN_DB_SUFFIX = "-ASN";
+ private static final String ANONYMOUS_IP_DB_SUFFIX = "-Anonymous-IP";
+ private static final String CONNECTION_TYPE_DB_SUFFIX = "-Connection-Type";
+ private static final String DOMAIN_DB_SUFFIX = "-Domain";
+ private static final String ENTERPRISE_DB_SUFFIX = "-Enterprise";
+ private static final String ISP_DB_SUFFIX = "-ISP";
+
+ @Nullable
+ private static Database getMaxmindDatabase(final String databaseType) {
+ if (databaseType.endsWith(CITY_DB_SUFFIX)) {
+ return Database.City;
+ } else if (databaseType.endsWith(COUNTRY_DB_SUFFIX)) {
+ return Database.Country;
+ } else if (databaseType.endsWith(ASN_DB_SUFFIX)) {
+ return Database.Asn;
+ } else if (databaseType.endsWith(ANONYMOUS_IP_DB_SUFFIX)) {
+ return Database.AnonymousIp;
+ } else if (databaseType.endsWith(CONNECTION_TYPE_DB_SUFFIX)) {
+ return Database.ConnectionType;
+ } else if (databaseType.endsWith(DOMAIN_DB_SUFFIX)) {
+ return Database.Domain;
+ } else if (databaseType.endsWith(ENTERPRISE_DB_SUFFIX)) {
+ return Database.Enterprise;
+ } else if (databaseType.endsWith(ISP_DB_SUFFIX)) {
+ return Database.Isp;
+ } else {
+ return null; // no match was found
+ }
+ }
+
+ /**
+ * Parses the passed-in databaseType and return the Database instance that is
+ * associated with that databaseType.
+ *
+ * @param databaseType the database type String from the metadata of the database file
+ * @return the Database instance that is associated with the databaseType
+ */
+ @Nullable
+ static Database getDatabase(final String databaseType) {
+ Database database = null;
+
+ if (Strings.hasText(databaseType)) {
+ database = getMaxmindDatabase(databaseType);
+ }
+
+ return database;
+ }
+
+ @Nullable
+ static Function, IpDataLookup> getMaxmindLookup(final Database database) {
+ return switch (database) {
+ case City -> MaxmindIpDataLookups.City::new;
+ case Country -> MaxmindIpDataLookups.Country::new;
+ case Asn -> MaxmindIpDataLookups.Asn::new;
+ case AnonymousIp -> MaxmindIpDataLookups.AnonymousIp::new;
+ case ConnectionType -> MaxmindIpDataLookups.ConnectionType::new;
+ case Domain -> MaxmindIpDataLookups.Domain::new;
+ case Enterprise -> MaxmindIpDataLookups.Enterprise::new;
+ case Isp -> MaxmindIpDataLookups.Isp::new;
+ default -> null;
+ };
+ }
+
+ static IpDataLookupFactory get(final String databaseType, final String databaseFile) {
+ final Database database = getDatabase(databaseType);
+ if (database == null) {
+ throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
+ }
+
+ final Function, IpDataLookup> factoryMethod = getMaxmindLookup(database);
+
+ if (factoryMethod == null) {
+ throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
+ }
+
+ return (properties) -> factoryMethod.apply(database.parseProperties(properties));
+ }
+}
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDatabase.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDatabase.java
index f416259a87d27..db1ffc1c682b8 100644
--- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDatabase.java
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpDatabase.java
@@ -9,15 +9,9 @@
package org.elasticsearch.ingest.geoip;
-import com.maxmind.geoip2.model.AnonymousIpResponse;
-import com.maxmind.geoip2.model.AsnResponse;
-import com.maxmind.geoip2.model.CityResponse;
-import com.maxmind.geoip2.model.ConnectionTypeResponse;
-import com.maxmind.geoip2.model.CountryResponse;
-import com.maxmind.geoip2.model.DomainResponse;
-import com.maxmind.geoip2.model.EnterpriseResponse;
-import com.maxmind.geoip2.model.IspResponse;
+import com.maxmind.db.Reader;
+import org.elasticsearch.common.CheckedBiFunction;
import org.elasticsearch.core.Nullable;
import java.io.IOException;
@@ -34,44 +28,15 @@ public interface IpDatabase extends AutoCloseable {
String getDatabaseType() throws IOException;
/**
- * @param ipAddress the IP address to look up
- * @return a response containing the city data for the given address if it exists, or null if it could not be found
- * @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving city data
- */
- @Nullable
- CityResponse getCity(String ipAddress);
-
- /**
- * @param ipAddress the IP address to look up
- * @return a response containing the country data for the given address if it exists, or null if it could not be found
- * @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving country data
- */
- @Nullable
- CountryResponse getCountry(String ipAddress);
-
- /**
- * @param ipAddress the IP address to look up
- * @return a response containing the Autonomous System Number for the given address if it exists, or null if it could not
- * be found
- * @throws UnsupportedOperationException may be thrown if the implementation does not support retrieving ASN data
+ * Returns a response from this database's reader for the given IP address.
+ *
+ * @param ipAddress the address to lookup
+ * @param responseProvider a method for extracting a response from a {@link Reader}, usually this will be a method reference
+ * @return a possibly-null response
+ * @param the type of response that will be returned
*/
@Nullable
- AsnResponse getAsn(String ipAddress);
-
- @Nullable
- AnonymousIpResponse getAnonymousIp(String ipAddress);
-
- @Nullable
- ConnectionTypeResponse getConnectionType(String ipAddress);
-
- @Nullable
- DomainResponse getDomain(String ipAddress);
-
- @Nullable
- EnterpriseResponse getEnterprise(String ipAddress);
-
- @Nullable
- IspResponse getIsp(String ipAddress);
+ RESPONSE getResponse(String ipAddress, CheckedBiFunction responseProvider);
/**
* Releases the current database object. Called after processing a single document. Databases should be closed or returned to a
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java
new file mode 100644
index 0000000000000..ac7f56468f37e
--- /dev/null
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookups.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import com.maxmind.db.DatabaseRecord;
+import com.maxmind.db.MaxMindDbConstructor;
+import com.maxmind.db.MaxMindDbParameter;
+import com.maxmind.db.Reader;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.network.InetAddresses;
+import org.elasticsearch.common.network.NetworkAddress;
+import org.elasticsearch.core.Nullable;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A collection of {@link IpDataLookup} implementations for IPinfo databases
+ */
+final class IpinfoIpDataLookups {
+
+ private IpinfoIpDataLookups() {
+ // utility class
+ }
+
+ private static final Logger logger = LogManager.getLogger(IpinfoIpDataLookups.class);
+
+ /**
+ * Lax-ly parses a string that (ideally) looks like 'AS123' into a Long like 123L (or null, if such parsing isn't possible).
+ * @param asn a potentially empty (or null) ASN string that is expected to contain 'AS' and then a parsable long
+ * @return the parsed asn
+ */
+ static Long parseAsn(final String asn) {
+ if (asn == null || Strings.hasText(asn) == false) {
+ return null;
+ } else {
+ String stripped = asn.toUpperCase(Locale.ROOT).replaceAll("AS", "").trim();
+ try {
+ return Long.parseLong(stripped);
+ } catch (NumberFormatException e) {
+ logger.trace("Unable to parse non-compliant ASN string [{}]", asn);
+ return null;
+ }
+ }
+ }
+
+ public record AsnResult(
+ Long asn,
+ @Nullable String country, // not present in the free asn database
+ String domain,
+ String name,
+ @Nullable String type // not present in the free asn database
+ ) {
+ @SuppressWarnings("checkstyle:RedundantModifier")
+ @MaxMindDbConstructor
+ public AsnResult(
+ @MaxMindDbParameter(name = "asn") String asn,
+ @Nullable @MaxMindDbParameter(name = "country") String country,
+ @MaxMindDbParameter(name = "domain") String domain,
+ @MaxMindDbParameter(name = "name") String name,
+ @Nullable @MaxMindDbParameter(name = "type") String type
+ ) {
+ this(parseAsn(asn), country, domain, name, type);
+ }
+ }
+
+ public record CountryResult(
+ @MaxMindDbParameter(name = "continent") String continent,
+ @MaxMindDbParameter(name = "continent_name") String continentName,
+ @MaxMindDbParameter(name = "country") String country,
+ @MaxMindDbParameter(name = "country_name") String countryName
+ ) {
+ @MaxMindDbConstructor
+ public CountryResult {}
+ }
+
+ static class Asn extends AbstractBase {
+ Asn(Set properties) {
+ super(properties, AsnResult.class);
+ }
+
+ @Override
+ protected Map transform(final Result result) {
+ AsnResult response = result.result;
+ Long asn = response.asn;
+ String organizationName = response.name;
+ String network = result.network;
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", result.ip);
+ case ASN -> {
+ if (asn != null) {
+ data.put("asn", asn);
+ }
+ }
+ case ORGANIZATION_NAME -> {
+ if (organizationName != null) {
+ data.put("organization_name", organizationName);
+ }
+ }
+ case NETWORK -> {
+ if (network != null) {
+ data.put("network", network);
+ }
+ }
+ case COUNTRY_ISO_CODE -> {
+ if (response.country != null) {
+ data.put("country_iso_code", response.country);
+ }
+ }
+ case DOMAIN -> {
+ if (response.domain != null) {
+ data.put("domain", response.domain);
+ }
+ }
+ case TYPE -> {
+ if (response.type != null) {
+ data.put("type", response.type);
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Country extends AbstractBase {
+ Country(Set properties) {
+ super(properties, CountryResult.class);
+ }
+
+ @Override
+ protected Map transform(final Result result) {
+ CountryResult response = result.result;
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", result.ip);
+ case COUNTRY_ISO_CODE -> {
+ String countryIsoCode = response.country;
+ if (countryIsoCode != null) {
+ data.put("country_iso_code", countryIsoCode);
+ }
+ }
+ case COUNTRY_NAME -> {
+ String countryName = response.countryName;
+ if (countryName != null) {
+ data.put("country_name", countryName);
+ }
+ }
+ case CONTINENT_CODE -> {
+ String continentCode = response.continent;
+ if (continentCode != null) {
+ data.put("continent_code", continentCode);
+ }
+ }
+ case CONTINENT_NAME -> {
+ String continentName = response.continentName;
+ if (continentName != null) {
+ data.put("continent_name", continentName);
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ /**
+ * Just a little record holder -- there's the data that we receive via the binding to our record objects from the Reader via the
+ * getRecord call, but then we also need to capture the passed-in ip address that came from the caller as well as the network for
+ * the returned DatabaseRecord from the Reader.
+ */
+ public record Result(T result, String ip, String network) {}
+
+ /**
+ * The {@link IpinfoIpDataLookups.AbstractBase} is an abstract base implementation of {@link IpDataLookup} that
+ * provides common functionality for getting a {@link IpinfoIpDataLookups.Result} that wraps a record from a {@link IpDatabase}.
+ *
+ * @param the record type that will be wrapped and returned
+ */
+ private abstract static class AbstractBase implements IpDataLookup {
+
+ protected final Set properties;
+ protected final Class clazz;
+
+ AbstractBase(final Set properties, final Class clazz) {
+ this.properties = Set.copyOf(properties);
+ this.clazz = clazz;
+ }
+
+ @Override
+ public Set getProperties() {
+ return this.properties;
+ }
+
+ @Override
+ public final Map getData(final IpDatabase ipDatabase, final String ipAddress) {
+ final Result response = ipDatabase.getResponse(ipAddress, this::lookup);
+ return (response == null || response.result == null) ? Map.of() : transform(response);
+ }
+
+ @Nullable
+ private Result lookup(final Reader reader, final String ipAddress) throws IOException {
+ final InetAddress ip = InetAddresses.forString(ipAddress);
+ final DatabaseRecord record = reader.getRecord(ip, clazz);
+ final RESPONSE data = record.getData();
+ return (data == null) ? null : new Result<>(data, NetworkAddress.format(ip), record.getNetwork().toString());
+ }
+
+ /**
+ * Extract the configured properties from the retrieved response
+ * @param response the non-null response that was retrieved
+ * @return a mapping of properties for the ip from the response
+ */
+ protected abstract Map transform(Result response);
+ }
+}
diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java
new file mode 100644
index 0000000000000..e7c3481938033
--- /dev/null
+++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/MaxmindIpDataLookups.java
@@ -0,0 +1,667 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import com.maxmind.db.DatabaseRecord;
+import com.maxmind.db.Network;
+import com.maxmind.db.Reader;
+import com.maxmind.geoip2.model.AbstractResponse;
+import com.maxmind.geoip2.model.AnonymousIpResponse;
+import com.maxmind.geoip2.model.AsnResponse;
+import com.maxmind.geoip2.model.CityResponse;
+import com.maxmind.geoip2.model.ConnectionTypeResponse;
+import com.maxmind.geoip2.model.CountryResponse;
+import com.maxmind.geoip2.model.DomainResponse;
+import com.maxmind.geoip2.model.EnterpriseResponse;
+import com.maxmind.geoip2.model.IspResponse;
+import com.maxmind.geoip2.record.Continent;
+import com.maxmind.geoip2.record.Location;
+import com.maxmind.geoip2.record.Postal;
+import com.maxmind.geoip2.record.Subdivision;
+
+import org.elasticsearch.common.network.InetAddresses;
+import org.elasticsearch.common.network.NetworkAddress;
+import org.elasticsearch.core.Nullable;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * A collection of {@link IpDataLookup} implementations for MaxMind databases
+ */
+final class MaxmindIpDataLookups {
+
+ private MaxmindIpDataLookups() {
+ // utility class
+ }
+
+ static class AnonymousIp extends AbstractBase {
+ AnonymousIp(final Set properties) {
+ super(
+ properties,
+ AnonymousIpResponse.class,
+ (response, ipAddress, network, locales) -> new AnonymousIpResponse(response, ipAddress, network)
+ );
+ }
+
+ @Override
+ protected Map transform(final AnonymousIpResponse response) {
+ boolean isHostingProvider = response.isHostingProvider();
+ boolean isTorExitNode = response.isTorExitNode();
+ boolean isAnonymousVpn = response.isAnonymousVpn();
+ boolean isAnonymous = response.isAnonymous();
+ boolean isPublicProxy = response.isPublicProxy();
+ boolean isResidentialProxy = response.isResidentialProxy();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getIpAddress());
+ case HOSTING_PROVIDER -> {
+ data.put("hosting_provider", isHostingProvider);
+ }
+ case TOR_EXIT_NODE -> {
+ data.put("tor_exit_node", isTorExitNode);
+ }
+ case ANONYMOUS_VPN -> {
+ data.put("anonymous_vpn", isAnonymousVpn);
+ }
+ case ANONYMOUS -> {
+ data.put("anonymous", isAnonymous);
+ }
+ case PUBLIC_PROXY -> {
+ data.put("public_proxy", isPublicProxy);
+ }
+ case RESIDENTIAL_PROXY -> {
+ data.put("residential_proxy", isResidentialProxy);
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Asn extends AbstractBase {
+ Asn(Set properties) {
+ super(properties, AsnResponse.class, (response, ipAddress, network, locales) -> new AsnResponse(response, ipAddress, network));
+ }
+
+ @Override
+ protected Map transform(final AsnResponse response) {
+ Long asn = response.getAutonomousSystemNumber();
+ String organizationName = response.getAutonomousSystemOrganization();
+ Network network = response.getNetwork();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getIpAddress());
+ case ASN -> {
+ if (asn != null) {
+ data.put("asn", asn);
+ }
+ }
+ case ORGANIZATION_NAME -> {
+ if (organizationName != null) {
+ data.put("organization_name", organizationName);
+ }
+ }
+ case NETWORK -> {
+ if (network != null) {
+ data.put("network", network.toString());
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class City extends AbstractBase {
+ City(final Set properties) {
+ super(properties, CityResponse.class, CityResponse::new);
+ }
+
+ @Override
+ protected Map transform(final CityResponse response) {
+ com.maxmind.geoip2.record.Country country = response.getCountry();
+ com.maxmind.geoip2.record.City city = response.getCity();
+ Location location = response.getLocation();
+ Continent continent = response.getContinent();
+ Subdivision subdivision = response.getMostSpecificSubdivision();
+ Postal postal = response.getPostal();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getTraits().getIpAddress());
+ case COUNTRY_IN_EUROPEAN_UNION -> {
+ if (country.getIsoCode() != null) {
+ // isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
+ data.put("country_in_european_union", country.isInEuropeanUnion());
+ }
+ }
+ case COUNTRY_ISO_CODE -> {
+ String countryIsoCode = country.getIsoCode();
+ if (countryIsoCode != null) {
+ data.put("country_iso_code", countryIsoCode);
+ }
+ }
+ case COUNTRY_NAME -> {
+ String countryName = country.getName();
+ if (countryName != null) {
+ data.put("country_name", countryName);
+ }
+ }
+ case CONTINENT_CODE -> {
+ String continentCode = continent.getCode();
+ if (continentCode != null) {
+ data.put("continent_code", continentCode);
+ }
+ }
+ case CONTINENT_NAME -> {
+ String continentName = continent.getName();
+ if (continentName != null) {
+ data.put("continent_name", continentName);
+ }
+ }
+ case REGION_ISO_CODE -> {
+ // ISO 3166-2 code for country subdivisions.
+ // See iso.org/iso-3166-country-codes.html
+ String countryIso = country.getIsoCode();
+ String subdivisionIso = subdivision.getIsoCode();
+ if (countryIso != null && subdivisionIso != null) {
+ String regionIsoCode = countryIso + "-" + subdivisionIso;
+ data.put("region_iso_code", regionIsoCode);
+ }
+ }
+ case REGION_NAME -> {
+ String subdivisionName = subdivision.getName();
+ if (subdivisionName != null) {
+ data.put("region_name", subdivisionName);
+ }
+ }
+ case CITY_NAME -> {
+ String cityName = city.getName();
+ if (cityName != null) {
+ data.put("city_name", cityName);
+ }
+ }
+ case TIMEZONE -> {
+ String locationTimeZone = location.getTimeZone();
+ if (locationTimeZone != null) {
+ data.put("timezone", locationTimeZone);
+ }
+ }
+ case LOCATION -> {
+ Double latitude = location.getLatitude();
+ Double longitude = location.getLongitude();
+ if (latitude != null && longitude != null) {
+ Map locationObject = new HashMap<>();
+ locationObject.put("lat", latitude);
+ locationObject.put("lon", longitude);
+ data.put("location", locationObject);
+ }
+ }
+ case ACCURACY_RADIUS -> {
+ Integer accuracyRadius = location.getAccuracyRadius();
+ if (accuracyRadius != null) {
+ data.put("accuracy_radius", accuracyRadius);
+ }
+ }
+ case POSTAL_CODE -> {
+ if (postal != null && postal.getCode() != null) {
+ data.put("postal_code", postal.getCode());
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class ConnectionType extends AbstractBase {
+ ConnectionType(final Set properties) {
+ super(
+ properties,
+ ConnectionTypeResponse.class,
+ (response, ipAddress, network, locales) -> new ConnectionTypeResponse(response, ipAddress, network)
+ );
+ }
+
+ @Override
+ protected Map transform(final ConnectionTypeResponse response) {
+ ConnectionTypeResponse.ConnectionType connectionType = response.getConnectionType();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getIpAddress());
+ case CONNECTION_TYPE -> {
+ if (connectionType != null) {
+ data.put("connection_type", connectionType.toString());
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Country extends AbstractBase {
+ Country(final Set properties) {
+ super(properties, CountryResponse.class, CountryResponse::new);
+ }
+
+ @Override
+ protected Map transform(final CountryResponse response) {
+ com.maxmind.geoip2.record.Country country = response.getCountry();
+ Continent continent = response.getContinent();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getTraits().getIpAddress());
+ case COUNTRY_IN_EUROPEAN_UNION -> {
+ if (country.getIsoCode() != null) {
+ // isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
+ data.put("country_in_european_union", country.isInEuropeanUnion());
+ }
+ }
+ case COUNTRY_ISO_CODE -> {
+ String countryIsoCode = country.getIsoCode();
+ if (countryIsoCode != null) {
+ data.put("country_iso_code", countryIsoCode);
+ }
+ }
+ case COUNTRY_NAME -> {
+ String countryName = country.getName();
+ if (countryName != null) {
+ data.put("country_name", countryName);
+ }
+ }
+ case CONTINENT_CODE -> {
+ String continentCode = continent.getCode();
+ if (continentCode != null) {
+ data.put("continent_code", continentCode);
+ }
+ }
+ case CONTINENT_NAME -> {
+ String continentName = continent.getName();
+ if (continentName != null) {
+ data.put("continent_name", continentName);
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Domain extends AbstractBase {
+ Domain(final Set properties) {
+ super(
+ properties,
+ DomainResponse.class,
+ (response, ipAddress, network, locales) -> new DomainResponse(response, ipAddress, network)
+ );
+ }
+
+ @Override
+ protected Map transform(final DomainResponse response) {
+ String domain = response.getDomain();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getIpAddress());
+ case DOMAIN -> {
+ if (domain != null) {
+ data.put("domain", domain);
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Enterprise extends AbstractBase {
+ Enterprise(final Set properties) {
+ super(properties, EnterpriseResponse.class, EnterpriseResponse::new);
+ }
+
+ @Override
+ protected Map transform(final EnterpriseResponse response) {
+ com.maxmind.geoip2.record.Country country = response.getCountry();
+ com.maxmind.geoip2.record.City city = response.getCity();
+ Location location = response.getLocation();
+ Continent continent = response.getContinent();
+ Subdivision subdivision = response.getMostSpecificSubdivision();
+ Postal postal = response.getPostal();
+
+ Long asn = response.getTraits().getAutonomousSystemNumber();
+ String organizationName = response.getTraits().getAutonomousSystemOrganization();
+ Network network = response.getTraits().getNetwork();
+
+ String isp = response.getTraits().getIsp();
+ String ispOrganization = response.getTraits().getOrganization();
+ String mobileCountryCode = response.getTraits().getMobileCountryCode();
+ String mobileNetworkCode = response.getTraits().getMobileNetworkCode();
+
+ boolean isHostingProvider = response.getTraits().isHostingProvider();
+ boolean isTorExitNode = response.getTraits().isTorExitNode();
+ boolean isAnonymousVpn = response.getTraits().isAnonymousVpn();
+ boolean isAnonymous = response.getTraits().isAnonymous();
+ boolean isPublicProxy = response.getTraits().isPublicProxy();
+ boolean isResidentialProxy = response.getTraits().isResidentialProxy();
+
+ String userType = response.getTraits().getUserType();
+
+ String domain = response.getTraits().getDomain();
+
+ ConnectionTypeResponse.ConnectionType connectionType = response.getTraits().getConnectionType();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getTraits().getIpAddress());
+ case COUNTRY_CONFIDENCE -> {
+ Integer countryConfidence = country.getConfidence();
+ if (countryConfidence != null) {
+ data.put("country_confidence", countryConfidence);
+ }
+ }
+ case COUNTRY_IN_EUROPEAN_UNION -> {
+ if (country.getIsoCode() != null) {
+ // isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
+ data.put("country_in_european_union", country.isInEuropeanUnion());
+ }
+ }
+ case COUNTRY_ISO_CODE -> {
+ String countryIsoCode = country.getIsoCode();
+ if (countryIsoCode != null) {
+ data.put("country_iso_code", countryIsoCode);
+ }
+ }
+ case COUNTRY_NAME -> {
+ String countryName = country.getName();
+ if (countryName != null) {
+ data.put("country_name", countryName);
+ }
+ }
+ case CONTINENT_CODE -> {
+ String continentCode = continent.getCode();
+ if (continentCode != null) {
+ data.put("continent_code", continentCode);
+ }
+ }
+ case CONTINENT_NAME -> {
+ String continentName = continent.getName();
+ if (continentName != null) {
+ data.put("continent_name", continentName);
+ }
+ }
+ case REGION_ISO_CODE -> {
+ // ISO 3166-2 code for country subdivisions.
+ // See iso.org/iso-3166-country-codes.html
+ String countryIso = country.getIsoCode();
+ String subdivisionIso = subdivision.getIsoCode();
+ if (countryIso != null && subdivisionIso != null) {
+ String regionIsoCode = countryIso + "-" + subdivisionIso;
+ data.put("region_iso_code", regionIsoCode);
+ }
+ }
+ case REGION_NAME -> {
+ String subdivisionName = subdivision.getName();
+ if (subdivisionName != null) {
+ data.put("region_name", subdivisionName);
+ }
+ }
+ case CITY_CONFIDENCE -> {
+ Integer cityConfidence = city.getConfidence();
+ if (cityConfidence != null) {
+ data.put("city_confidence", cityConfidence);
+ }
+ }
+ case CITY_NAME -> {
+ String cityName = city.getName();
+ if (cityName != null) {
+ data.put("city_name", cityName);
+ }
+ }
+ case TIMEZONE -> {
+ String locationTimeZone = location.getTimeZone();
+ if (locationTimeZone != null) {
+ data.put("timezone", locationTimeZone);
+ }
+ }
+ case LOCATION -> {
+ Double latitude = location.getLatitude();
+ Double longitude = location.getLongitude();
+ if (latitude != null && longitude != null) {
+ Map locationObject = new HashMap<>();
+ locationObject.put("lat", latitude);
+ locationObject.put("lon", longitude);
+ data.put("location", locationObject);
+ }
+ }
+ case ACCURACY_RADIUS -> {
+ Integer accuracyRadius = location.getAccuracyRadius();
+ if (accuracyRadius != null) {
+ data.put("accuracy_radius", accuracyRadius);
+ }
+ }
+ case POSTAL_CODE -> {
+ if (postal != null && postal.getCode() != null) {
+ data.put("postal_code", postal.getCode());
+ }
+ }
+ case POSTAL_CONFIDENCE -> {
+ Integer postalConfidence = postal.getConfidence();
+ if (postalConfidence != null) {
+ data.put("postal_confidence", postalConfidence);
+ }
+ }
+ case ASN -> {
+ if (asn != null) {
+ data.put("asn", asn);
+ }
+ }
+ case ORGANIZATION_NAME -> {
+ if (organizationName != null) {
+ data.put("organization_name", organizationName);
+ }
+ }
+ case NETWORK -> {
+ if (network != null) {
+ data.put("network", network.toString());
+ }
+ }
+ case HOSTING_PROVIDER -> {
+ data.put("hosting_provider", isHostingProvider);
+ }
+ case TOR_EXIT_NODE -> {
+ data.put("tor_exit_node", isTorExitNode);
+ }
+ case ANONYMOUS_VPN -> {
+ data.put("anonymous_vpn", isAnonymousVpn);
+ }
+ case ANONYMOUS -> {
+ data.put("anonymous", isAnonymous);
+ }
+ case PUBLIC_PROXY -> {
+ data.put("public_proxy", isPublicProxy);
+ }
+ case RESIDENTIAL_PROXY -> {
+ data.put("residential_proxy", isResidentialProxy);
+ }
+ case DOMAIN -> {
+ if (domain != null) {
+ data.put("domain", domain);
+ }
+ }
+ case ISP -> {
+ if (isp != null) {
+ data.put("isp", isp);
+ }
+ }
+ case ISP_ORGANIZATION_NAME -> {
+ if (ispOrganization != null) {
+ data.put("isp_organization_name", ispOrganization);
+ }
+ }
+ case MOBILE_COUNTRY_CODE -> {
+ if (mobileCountryCode != null) {
+ data.put("mobile_country_code", mobileCountryCode);
+ }
+ }
+ case MOBILE_NETWORK_CODE -> {
+ if (mobileNetworkCode != null) {
+ data.put("mobile_network_code", mobileNetworkCode);
+ }
+ }
+ case USER_TYPE -> {
+ if (userType != null) {
+ data.put("user_type", userType);
+ }
+ }
+ case CONNECTION_TYPE -> {
+ if (connectionType != null) {
+ data.put("connection_type", connectionType.toString());
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ static class Isp extends AbstractBase {
+ Isp(final Set properties) {
+ super(properties, IspResponse.class, (response, ipAddress, network, locales) -> new IspResponse(response, ipAddress, network));
+ }
+
+ @Override
+ protected Map transform(final IspResponse response) {
+ String isp = response.getIsp();
+ String ispOrganization = response.getOrganization();
+ String mobileNetworkCode = response.getMobileNetworkCode();
+ String mobileCountryCode = response.getMobileCountryCode();
+ Long asn = response.getAutonomousSystemNumber();
+ String organizationName = response.getAutonomousSystemOrganization();
+ Network network = response.getNetwork();
+
+ Map data = new HashMap<>();
+ for (Database.Property property : this.properties) {
+ switch (property) {
+ case IP -> data.put("ip", response.getIpAddress());
+ case ASN -> {
+ if (asn != null) {
+ data.put("asn", asn);
+ }
+ }
+ case ORGANIZATION_NAME -> {
+ if (organizationName != null) {
+ data.put("organization_name", organizationName);
+ }
+ }
+ case NETWORK -> {
+ if (network != null) {
+ data.put("network", network.toString());
+ }
+ }
+ case ISP -> {
+ if (isp != null) {
+ data.put("isp", isp);
+ }
+ }
+ case ISP_ORGANIZATION_NAME -> {
+ if (ispOrganization != null) {
+ data.put("isp_organization_name", ispOrganization);
+ }
+ }
+ case MOBILE_COUNTRY_CODE -> {
+ if (mobileCountryCode != null) {
+ data.put("mobile_country_code", mobileCountryCode);
+ }
+ }
+ case MOBILE_NETWORK_CODE -> {
+ if (mobileNetworkCode != null) {
+ data.put("mobile_network_code", mobileNetworkCode);
+ }
+ }
+ }
+ }
+ return data;
+ }
+ }
+
+ /**
+ * As an internal detail, the {@code com.maxmind.geoip2.model } classes that are populated by
+ * {@link Reader#getRecord(InetAddress, Class)} are kinda half-populated and need to go through a second round of construction
+ * with context from the querying caller. This method gives us a place do that additional binding. Cleverly, the signature
+ * here matches the constructor for many of these model classes exactly, so an appropriate implementation can 'just' be a method
+ * reference in some cases (in other cases it needs to be a lambda).
+ */
+ @FunctionalInterface
+ private interface ResponseBuilder {
+ RESPONSE build(RESPONSE resp, String address, Network network, List locales);
+ }
+
+ /**
+ * The {@link MaxmindIpDataLookups.AbstractBase} is an abstract base implementation of {@link IpDataLookup} that
+ * provides common functionality for getting a specific kind of {@link AbstractResponse} from a {@link IpDatabase}.
+ *
+ * @param the intermediate type of {@link AbstractResponse}
+ */
+ private abstract static class AbstractBase implements IpDataLookup {
+
+ protected final Set properties;
+ protected final Class clazz;
+ protected final ResponseBuilder builder;
+
+ AbstractBase(final Set properties, final Class clazz, final ResponseBuilder builder) {
+ this.properties = Set.copyOf(properties);
+ this.clazz = clazz;
+ this.builder = builder;
+ }
+
+ @Override
+ public Set getProperties() {
+ return this.properties;
+ }
+
+ @Override
+ public final Map getData(final IpDatabase ipDatabase, final String ipAddress) {
+ final RESPONSE response = ipDatabase.getResponse(ipAddress, this::lookup);
+ return (response == null) ? Map.of() : transform(response);
+ }
+
+ @Nullable
+ private RESPONSE lookup(final Reader reader, final String ipAddress) throws IOException {
+ final InetAddress ip = InetAddresses.forString(ipAddress);
+ final DatabaseRecord record = reader.getRecord(ip, clazz);
+ final RESPONSE data = record.getData();
+ return (data == null) ? null : builder.build(data, NetworkAddress.format(ip), record.getNetwork(), List.of("en"));
+ }
+
+ /**
+ * Extract the configured properties from the retrieved response
+ * @param response the non-null response that was retrieved
+ * @return a mapping of properties for the ip from the response
+ */
+ protected abstract Map transform(RESPONSE response);
+ }
+}
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/ConfigDatabasesTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/ConfigDatabasesTests.java
index 83b3d2cfbbc27..7f38a37b43edf 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/ConfigDatabasesTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/ConfigDatabasesTests.java
@@ -126,7 +126,7 @@ public void testDatabasesUpdateExistingConfDatabase() throws Exception {
DatabaseReaderLazyLoader loader = configDatabases.getDatabase("GeoLite2-City.mmdb");
assertThat(loader.getDatabaseType(), equalTo("GeoLite2-City"));
- CityResponse cityResponse = loader.getCity("89.160.20.128");
+ CityResponse cityResponse = loader.getResponse("89.160.20.128", GeoIpTestUtils::getCity);
assertThat(cityResponse.getCity().getName(), equalTo("Tumba"));
assertThat(cache.count(), equalTo(1));
}
@@ -138,7 +138,7 @@ public void testDatabasesUpdateExistingConfDatabase() throws Exception {
DatabaseReaderLazyLoader loader = configDatabases.getDatabase("GeoLite2-City.mmdb");
assertThat(loader.getDatabaseType(), equalTo("GeoLite2-City"));
- CityResponse cityResponse = loader.getCity("89.160.20.128");
+ CityResponse cityResponse = loader.getResponse("89.160.20.128", GeoIpTestUtils::getCity);
assertThat(cityResponse.getCity().getName(), equalTo("Linköping"));
assertThat(cache.count(), equalTo(1));
});
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
index 9972db26b3642..cfea54d2520bd 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java
@@ -195,7 +195,7 @@ public void testBuildWithCountryDbAndAsnFields() {
equalTo(
"[properties] illegal property value ["
+ asnProperty
- + "]. valid values are [IP, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
+ + "]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
)
);
}
@@ -273,8 +273,9 @@ public void testBuildIllegalFieldOption() {
assertThat(
e.getMessage(),
equalTo(
- "[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_ISO_CODE, "
- + "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, LOCATION]"
+ "[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, "
+ + "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, "
+ + "LOCATION, POSTAL_CODE, ACCURACY_RADIUS]"
)
);
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java
index f5c3c08579855..ffc40324bd886 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java
@@ -14,7 +14,6 @@
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.RandomDocumentPicks;
-import org.elasticsearch.ingest.geoip.Database.Property;
import org.elasticsearch.test.ESTestCase;
import org.junit.After;
import org.junit.Before;
@@ -40,7 +39,9 @@
public class GeoIpProcessorTests extends ESTestCase {
- private static final Set ALL_PROPERTIES = Set.of(Property.values());
+ private static IpDataLookup ipDataLookupAll(final Database database) {
+ return IpDataLookupFactories.getMaxmindLookup(database).apply(database.properties());
+ }
// a temporary directory that mmdb files can be copied to and read from
private Path tmpDir;
@@ -64,8 +65,16 @@ public void testDatabasePropertyInvariants() {
assertThat(Sets.difference(Database.Asn.properties(), Database.Isp.properties()), is(empty()));
assertThat(Sets.difference(Database.Asn.defaultProperties(), Database.Isp.defaultProperties()), is(empty()));
- // the enterprise database is like everything joined together
- for (Database type : Database.values()) {
+ // the enterprise database is like these other databases joined together
+ for (Database type : Set.of(
+ Database.City,
+ Database.Country,
+ Database.Asn,
+ Database.AnonymousIp,
+ Database.ConnectionType,
+ Database.Domain,
+ Database.Isp
+ )) {
assertThat(Sets.difference(type.properties(), Database.Enterprise.properties()), is(empty()));
}
// but in terms of the default fields, it's like a drop-in replacement for the city database
@@ -82,7 +91,7 @@ public void testCity() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -97,8 +106,9 @@ public void testCity() throws Exception {
@SuppressWarnings("unchecked")
Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field");
assertThat(geoData, notNullValue());
- assertThat(geoData.size(), equalTo(7));
+ assertThat(geoData.size(), equalTo(9));
assertThat(geoData.get("ip"), equalTo(ip));
+ assertThat(geoData.get("country_in_european_union"), equalTo(false));
assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("country_name"), equalTo("United States"));
assertThat(geoData.get("continent_code"), equalTo("NA"));
@@ -115,7 +125,7 @@ public void testNullValueWithIgnoreMissing() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
true,
false,
"filename"
@@ -137,7 +147,7 @@ public void testNonExistentWithIgnoreMissing() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
true,
false,
"filename"
@@ -156,7 +166,7 @@ public void testNullWithoutIgnoreMissing() {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -178,7 +188,7 @@ public void testNonExistentWithoutIgnoreMissing() {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -198,7 +208,7 @@ public void testCity_withIpV6() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -213,8 +223,9 @@ public void testCity_withIpV6() throws Exception {
@SuppressWarnings("unchecked")
Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field");
assertThat(geoData, notNullValue());
- assertThat(geoData.size(), equalTo(10));
+ assertThat(geoData.size(), equalTo(13));
assertThat(geoData.get("ip"), equalTo(ip));
+ assertThat(geoData.get("country_in_european_union"), equalTo(false));
assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("country_name"), equalTo("United States"));
assertThat(geoData.get("continent_code"), equalTo("NA"));
@@ -224,6 +235,8 @@ public void testCity_withIpV6() throws Exception {
assertThat(geoData.get("city_name"), equalTo("Homestead"));
assertThat(geoData.get("timezone"), equalTo("America/New_York"));
assertThat(geoData.get("location"), equalTo(Map.of("lat", 25.4573d, "lon", -80.4572d)));
+ assertThat(geoData.get("accuracy_radius"), equalTo(50));
+ assertThat(geoData.get("postal_code"), equalTo("33035"));
}
public void testCityWithMissingLocation() throws Exception {
@@ -235,7 +248,7 @@ public void testCityWithMissingLocation() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -263,7 +276,7 @@ public void testCountry() throws Exception {
loader("GeoLite2-Country.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Country),
false,
false,
"filename"
@@ -278,8 +291,9 @@ public void testCountry() throws Exception {
@SuppressWarnings("unchecked")
Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field");
assertThat(geoData, notNullValue());
- assertThat(geoData.size(), equalTo(5));
+ assertThat(geoData.size(), equalTo(6));
assertThat(geoData.get("ip"), equalTo(ip));
+ assertThat(geoData.get("country_in_european_union"), equalTo(true));
assertThat(geoData.get("country_iso_code"), equalTo("NL"));
assertThat(geoData.get("country_name"), equalTo("Netherlands"));
assertThat(geoData.get("continent_code"), equalTo("EU"));
@@ -295,7 +309,7 @@ public void testCountryWithMissingLocation() throws Exception {
loader("GeoLite2-Country.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Country),
false,
false,
"filename"
@@ -323,7 +337,7 @@ public void testAsn() throws Exception {
loader("GeoLite2-ASN.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Asn),
false,
false,
"filename"
@@ -354,7 +368,7 @@ public void testAnonymmousIp() throws Exception {
loader("GeoIP2-Anonymous-IP-Test.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.AnonymousIp),
false,
false,
"filename"
@@ -388,7 +402,7 @@ public void testConnectionType() throws Exception {
loader("GeoIP2-Connection-Type-Test.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.ConnectionType),
false,
false,
"filename"
@@ -417,7 +431,7 @@ public void testDomain() throws Exception {
loader("GeoIP2-Domain-Test.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Domain),
false,
false,
"filename"
@@ -446,7 +460,7 @@ public void testEnterprise() throws Exception {
loader("GeoIP2-Enterprise-Test.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Enterprise),
false,
false,
"filename"
@@ -461,17 +475,23 @@ public void testEnterprise() throws Exception {
@SuppressWarnings("unchecked")
Map geoData = (Map) ingestDocument.getSourceAndMetadata().get("target_field");
assertThat(geoData, notNullValue());
- assertThat(geoData.size(), equalTo(24));
+ assertThat(geoData.size(), equalTo(30));
assertThat(geoData.get("ip"), equalTo(ip));
+ assertThat(geoData.get("country_confidence"), equalTo(99));
+ assertThat(geoData.get("country_in_european_union"), equalTo(false));
assertThat(geoData.get("country_iso_code"), equalTo("US"));
assertThat(geoData.get("country_name"), equalTo("United States"));
assertThat(geoData.get("continent_code"), equalTo("NA"));
assertThat(geoData.get("continent_name"), equalTo("North America"));
assertThat(geoData.get("region_iso_code"), equalTo("US-NY"));
assertThat(geoData.get("region_name"), equalTo("New York"));
+ assertThat(geoData.get("city_confidence"), equalTo(11));
assertThat(geoData.get("city_name"), equalTo("Chatham"));
assertThat(geoData.get("timezone"), equalTo("America/New_York"));
assertThat(geoData.get("location"), equalTo(Map.of("lat", 42.3478, "lon", -73.5549)));
+ assertThat(geoData.get("accuracy_radius"), equalTo(27));
+ assertThat(geoData.get("postal_code"), equalTo("12037"));
+ assertThat(geoData.get("city_confidence"), equalTo(11));
assertThat(geoData.get("asn"), equalTo(14671L));
assertThat(geoData.get("organization_name"), equalTo("FairPoint Communications"));
assertThat(geoData.get("network"), equalTo("74.209.16.0/20"));
@@ -497,7 +517,7 @@ public void testIsp() throws Exception {
loader("GeoIP2-ISP-Test.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.Isp),
false,
false,
"filename"
@@ -531,7 +551,7 @@ public void testAddressIsNotInTheDatabase() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -555,7 +575,7 @@ public void testInvalid() {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -576,7 +596,7 @@ public void testListAllValid() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -603,7 +623,7 @@ public void testListPartiallyValid() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -630,7 +650,7 @@ public void testListNoMatches() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"filename"
@@ -650,7 +670,7 @@ public void testListDatabaseReferenceCounting() throws Exception {
GeoIpProcessor processor = new GeoIpProcessor(randomAlphaOfLength(10), null, "source_field", () -> {
loader.preLookup();
return loader;
- }, () -> true, "target_field", ALL_PROPERTIES, false, false, "filename");
+ }, () -> true, "target_field", ipDataLookupAll(Database.City), false, false, "filename");
Map document = new HashMap<>();
document.put("source_field", List.of("8.8.8.8", "82.171.64.0"));
@@ -678,7 +698,7 @@ public void testListFirstOnly() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
true,
"filename"
@@ -703,7 +723,7 @@ public void testListFirstOnlyNoMatches() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
true,
"filename"
@@ -725,7 +745,7 @@ public void testInvalidDatabase() throws Exception {
loader("GeoLite2-City.mmdb"),
() -> false,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
true,
"filename"
@@ -748,7 +768,7 @@ public void testNoDatabase() throws Exception {
() -> null,
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
false,
false,
"GeoLite2-City"
@@ -771,7 +791,7 @@ public void testNoDatabase_ignoreMissing() throws Exception {
() -> null,
() -> true,
"target_field",
- ALL_PROPERTIES,
+ ipDataLookupAll(Database.City),
true,
false,
"GeoLite2-City"
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpTestUtils.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpTestUtils.java
index 461983bb24488..160671fd39001 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpTestUtils.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpTestUtils.java
@@ -9,6 +9,13 @@
package org.elasticsearch.ingest.geoip;
+import com.maxmind.db.DatabaseRecord;
+import com.maxmind.db.Reader;
+import com.maxmind.geoip2.model.CityResponse;
+import com.maxmind.geoip2.model.CountryResponse;
+
+import org.elasticsearch.common.CheckedBiFunction;
+import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.core.SuppressForbidden;
import java.io.FileNotFoundException;
@@ -17,6 +24,7 @@
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
+import java.util.List;
import java.util.Set;
import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
@@ -58,4 +66,28 @@ public static void copyDefaultDatabases(final Path directory, ConfigDatabases co
configDatabases.updateDatabase(directory.resolve(database), true);
}
}
+
+ /**
+ * A static city-specific responseProvider for use with {@link IpDatabase#getResponse(String, CheckedBiFunction)} in
+ * tests.
+ *
+ * Like this: {@code CityResponse city = loader.getResponse("some.ip.address", GeoIpTestUtils::getCity);}
+ */
+ public static CityResponse getCity(Reader reader, String ip) throws IOException {
+ DatabaseRecord record = reader.getRecord(InetAddresses.forString(ip), CityResponse.class);
+ CityResponse data = record.getData();
+ return data == null ? null : new CityResponse(data, ip, record.getNetwork(), List.of("en"));
+ }
+
+ /**
+ * A static country-specific responseProvider for use with {@link IpDatabase#getResponse(String, CheckedBiFunction)} in
+ * tests.
+ *
+ * Like this: {@code CountryResponse country = loader.getResponse("some.ip.address", GeoIpTestUtils::getCountry);}
+ */
+ public static CountryResponse getCountry(Reader reader, String ip) throws IOException {
+ DatabaseRecord record = reader.getRecord(InetAddresses.forString(ip), CountryResponse.class);
+ CountryResponse data = record.getData();
+ return data == null ? null : new CountryResponse(data, ip, record.getNetwork(), List.of("en"));
+ }
}
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java
new file mode 100644
index 0000000000000..905eb027626a1
--- /dev/null
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IpinfoIpDataLookupsTests.java
@@ -0,0 +1,223 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.ingest.geoip;
+
+import com.maxmind.db.DatabaseRecord;
+import com.maxmind.db.Networks;
+import com.maxmind.db.Reader;
+
+import org.elasticsearch.common.network.NetworkAddress;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.util.set.Sets;
+import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.core.TimeValue;
+import org.elasticsearch.test.ESTestCase;
+import org.elasticsearch.threadpool.TestThreadPool;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.watcher.ResourceWatcherService;
+import org.junit.After;
+import org.junit.Before;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.nio.file.Path;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.BiConsumer;
+
+import static java.util.Map.entry;
+import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDatabase;
+import static org.elasticsearch.ingest.geoip.IpinfoIpDataLookups.parseAsn;
+import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.hamcrest.Matchers.notNullValue;
+import static org.hamcrest.Matchers.nullValue;
+import static org.hamcrest.Matchers.startsWith;
+
+public class IpinfoIpDataLookupsTests extends ESTestCase {
+
+ private ThreadPool threadPool;
+ private ResourceWatcherService resourceWatcherService;
+
+ @Before
+ public void setup() {
+ threadPool = new TestThreadPool(ConfigDatabases.class.getSimpleName());
+ Settings settings = Settings.builder().put("resource.reload.interval.high", TimeValue.timeValueMillis(100)).build();
+ resourceWatcherService = new ResourceWatcherService(settings, threadPool);
+ }
+
+ @After
+ public void cleanup() {
+ resourceWatcherService.close();
+ threadPool.shutdownNow();
+ }
+
+ public void testDatabasePropertyInvariants() {
+ // the second ASN variant database is like a specialization of the ASN database
+ assertThat(Sets.difference(Database.Asn.properties(), Database.AsnV2.properties()), is(empty()));
+ assertThat(Database.Asn.defaultProperties(), equalTo(Database.AsnV2.defaultProperties()));
+ }
+
+ public void testParseAsn() {
+ // expected case: "AS123" is 123
+ assertThat(parseAsn("AS123"), equalTo(123L));
+ // defensive cases: null and empty becomes null, this is not expected fwiw
+ assertThat(parseAsn(null), nullValue());
+ assertThat(parseAsn(""), nullValue());
+ // defensive cases: we strip whitespace and ignore case
+ assertThat(parseAsn(" as 456 "), equalTo(456L));
+ // defensive cases: we ignore the absence of the 'AS' prefix
+ assertThat(parseAsn("123"), equalTo(123L));
+ // bottom case: a non-parsable string is null
+ assertThat(parseAsn("anythingelse"), nullValue());
+ }
+
+ public void testAsn() throws IOException {
+ Path configDir = createTempDir();
+ copyDatabase("ipinfo/ip_asn_sample.mmdb", configDir.resolve("ip_asn_sample.mmdb"));
+ copyDatabase("ipinfo/asn_sample.mmdb", configDir.resolve("asn_sample.mmdb"));
+
+ GeoIpCache cache = new GeoIpCache(1000); // real cache to test purging of entries upon a reload
+ ConfigDatabases configDatabases = new ConfigDatabases(configDir, cache);
+ configDatabases.initialize(resourceWatcherService);
+
+ // this is the 'free' ASN database (sample)
+ {
+ DatabaseReaderLazyLoader loader = configDatabases.getDatabase("ip_asn_sample.mmdb");
+ IpDataLookup lookup = new IpinfoIpDataLookups.Asn(Set.of(Database.Property.values()));
+ Map data = lookup.getData(loader, "5.182.109.0");
+ assertThat(
+ data,
+ equalTo(
+ Map.ofEntries(
+ entry("ip", "5.182.109.0"),
+ entry("organization_name", "M247 Europe SRL"),
+ entry("asn", 9009L),
+ entry("network", "5.182.109.0/24"),
+ entry("domain", "m247.com")
+ )
+ )
+ );
+ }
+
+ // this is the non-free or 'standard' ASN database (sample)
+ {
+ DatabaseReaderLazyLoader loader = configDatabases.getDatabase("asn_sample.mmdb");
+ IpDataLookup lookup = new IpinfoIpDataLookups.Asn(Set.of(Database.Property.values()));
+ Map data = lookup.getData(loader, "23.53.116.0");
+ assertThat(
+ data,
+ equalTo(
+ Map.ofEntries(
+ entry("ip", "23.53.116.0"),
+ entry("organization_name", "Akamai Technologies, Inc."),
+ entry("asn", 32787L),
+ entry("network", "23.53.116.0/24"),
+ entry("domain", "akamai.com"),
+ entry("type", "hosting"),
+ entry("country_iso_code", "US")
+ )
+ )
+ );
+ }
+ }
+
+ public void testAsnInvariants() {
+ Path configDir = createTempDir();
+ copyDatabase("ipinfo/ip_asn_sample.mmdb", configDir.resolve("ip_asn_sample.mmdb"));
+ copyDatabase("ipinfo/asn_sample.mmdb", configDir.resolve("asn_sample.mmdb"));
+
+ {
+ final Set expectedColumns = Set.of("network", "asn", "name", "domain");
+
+ Path databasePath = configDir.resolve("ip_asn_sample.mmdb");
+ assertDatabaseInvariants(databasePath, (ip, row) -> {
+ assertThat(row.keySet(), equalTo(expectedColumns));
+ String asn = (String) row.get("asn");
+ assertThat(asn, startsWith("AS"));
+ assertThat(asn, equalTo(asn.trim()));
+ Long parsed = parseAsn(asn);
+ assertThat(parsed, notNullValue());
+ assertThat(asn, equalTo("AS" + parsed)); // reverse it
+ });
+ }
+
+ {
+ final Set expectedColumns = Set.of("network", "asn", "name", "domain", "country", "type");
+
+ Path databasePath = configDir.resolve("asn_sample.mmdb");
+ assertDatabaseInvariants(databasePath, (ip, row) -> {
+ assertThat(row.keySet(), equalTo(expectedColumns));
+ String asn = (String) row.get("asn");
+ assertThat(asn, startsWith("AS"));
+ assertThat(asn, equalTo(asn.trim()));
+ Long parsed = parseAsn(asn);
+ assertThat(parsed, notNullValue());
+ assertThat(asn, equalTo("AS" + parsed)); // reverse it
+ });
+ }
+ }
+
+ public void testCountry() throws IOException {
+ Path configDir = createTempDir();
+ copyDatabase("ipinfo/ip_country_sample.mmdb", configDir.resolve("ip_country_sample.mmdb"));
+
+ GeoIpCache cache = new GeoIpCache(1000); // real cache to test purging of entries upon a reload
+ ConfigDatabases configDatabases = new ConfigDatabases(configDir, cache);
+ configDatabases.initialize(resourceWatcherService);
+
+ // this is the 'free' Country database (sample)
+ {
+ DatabaseReaderLazyLoader loader = configDatabases.getDatabase("ip_country_sample.mmdb");
+ IpDataLookup lookup = new IpinfoIpDataLookups.Country(Set.of(Database.Property.values()));
+ Map data = lookup.getData(loader, "4.221.143.168");
+ assertThat(
+ data,
+ equalTo(
+ Map.ofEntries(
+ entry("ip", "4.221.143.168"),
+ entry("country_name", "South Africa"),
+ entry("country_iso_code", "ZA"),
+ entry("continent_name", "Africa"),
+ entry("continent_code", "AF")
+ )
+ )
+ );
+ }
+ }
+
+ private static void assertDatabaseInvariants(final Path databasePath, final BiConsumer> rowConsumer) {
+ try (Reader reader = new Reader(pathToFile(databasePath))) {
+ Networks> networks = reader.networks(Map.class);
+ while (networks.hasNext()) {
+ DatabaseRecord> dbr = networks.next();
+ InetAddress address = dbr.getNetwork().getNetworkAddress();
+ @SuppressWarnings("unchecked")
+ Map result = reader.get(address, Map.class);
+ try {
+ rowConsumer.accept(address, result);
+ } catch (AssertionError e) {
+ fail(e, "Assert failed for address [%s]", NetworkAddress.format(address));
+ } catch (Exception e) {
+ fail(e, "Exception handling address [%s]", NetworkAddress.format(address));
+ }
+ }
+ } catch (Exception e) {
+ fail(e);
+ }
+ }
+
+ @SuppressForbidden(reason = "Maxmind API requires java.io.File")
+ private static File pathToFile(Path databasePath) {
+ return databasePath.toFile();
+ }
+}
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java
index f1c7d809b98fe..46a34c2cdad56 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MMDBUtilTests.java
@@ -116,6 +116,6 @@ public void testDatabaseTypeParsing() throws IOException {
}
private Database parseDatabaseFromType(String databaseFile) throws IOException {
- return Database.getDatabase(MMDBUtil.getDatabaseType(tmpDir.resolve(databaseFile)), null);
+ return IpDataLookupFactories.getDatabase(MMDBUtil.getDatabaseType(tmpDir.resolve(databaseFile)));
}
}
diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java
index ec05054615bd8..1e05cf2b3ba33 100644
--- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java
+++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/MaxMindSupportTests.java
@@ -78,13 +78,16 @@ public class MaxMindSupportTests extends ESTestCase {
"city.name",
"continent.code",
"continent.name",
+ "country.inEuropeanUnion",
"country.isoCode",
"country.name",
+ "location.accuracyRadius",
"location.latitude",
"location.longitude",
"location.timeZone",
"mostSpecificSubdivision.isoCode",
- "mostSpecificSubdivision.name"
+ "mostSpecificSubdivision.name",
+ "postal.code"
);
private static final Set CITY_UNSUPPORTED_FIELDS = Set.of(
"city.confidence",
@@ -94,14 +97,12 @@ public class MaxMindSupportTests extends ESTestCase {
"continent.names",
"country.confidence",
"country.geoNameId",
- "country.inEuropeanUnion",
"country.names",
"leastSpecificSubdivision.confidence",
"leastSpecificSubdivision.geoNameId",
"leastSpecificSubdivision.isoCode",
"leastSpecificSubdivision.name",
"leastSpecificSubdivision.names",
- "location.accuracyRadius",
"location.averageIncome",
"location.metroCode",
"location.populationDensity",
@@ -109,7 +110,6 @@ public class MaxMindSupportTests extends ESTestCase {
"mostSpecificSubdivision.confidence",
"mostSpecificSubdivision.geoNameId",
"mostSpecificSubdivision.names",
- "postal.code",
"postal.confidence",
"registeredCountry.confidence",
"registeredCountry.geoNameId",
@@ -159,6 +159,7 @@ public class MaxMindSupportTests extends ESTestCase {
private static final Set COUNTRY_SUPPORTED_FIELDS = Set.of(
"continent.name",
+ "country.inEuropeanUnion",
"country.isoCode",
"continent.code",
"country.name"
@@ -168,7 +169,6 @@ public class MaxMindSupportTests extends ESTestCase {
"continent.names",
"country.confidence",
"country.geoNameId",
- "country.inEuropeanUnion",
"country.names",
"maxMind",
"registeredCountry.confidence",
@@ -213,16 +213,22 @@ public class MaxMindSupportTests extends ESTestCase {
private static final Set DOMAIN_UNSUPPORTED_FIELDS = Set.of("ipAddress", "network");
private static final Set ENTERPRISE_SUPPORTED_FIELDS = Set.of(
+ "city.confidence",
"city.name",
"continent.code",
"continent.name",
+ "country.confidence",
+ "country.inEuropeanUnion",
"country.isoCode",
"country.name",
+ "location.accuracyRadius",
"location.latitude",
"location.longitude",
"location.timeZone",
"mostSpecificSubdivision.isoCode",
"mostSpecificSubdivision.name",
+ "postal.code",
+ "postal.confidence",
"traits.anonymous",
"traits.anonymousVpn",
"traits.autonomousSystemNumber",
@@ -241,21 +247,17 @@ public class MaxMindSupportTests extends ESTestCase {
"traits.userType"
);
private static final Set ENTERPRISE_UNSUPPORTED_FIELDS = Set.of(
- "city.confidence",
"city.geoNameId",
"city.names",
"continent.geoNameId",
"continent.names",
- "country.confidence",
"country.geoNameId",
- "country.inEuropeanUnion",
"country.names",
"leastSpecificSubdivision.confidence",
"leastSpecificSubdivision.geoNameId",
"leastSpecificSubdivision.isoCode",
"leastSpecificSubdivision.name",
"leastSpecificSubdivision.names",
- "location.accuracyRadius",
"location.averageIncome",
"location.metroCode",
"location.populationDensity",
@@ -263,8 +265,6 @@ public class MaxMindSupportTests extends ESTestCase {
"mostSpecificSubdivision.confidence",
"mostSpecificSubdivision.geoNameId",
"mostSpecificSubdivision.names",
- "postal.code",
- "postal.confidence",
"registeredCountry.confidence",
"registeredCountry.geoNameId",
"registeredCountry.inEuropeanUnion",
@@ -361,8 +361,14 @@ public class MaxMindSupportTests extends ESTestCase {
private static final Set> KNOWN_UNSUPPORTED_RESPONSE_CLASSES = Set.of(IpRiskResponse.class);
+ private static final Set KNOWN_UNSUPPORTED_DATABASE_VARIANTS = Set.of(Database.AsnV2);
+
public void testMaxMindSupport() {
for (Database databaseType : Database.values()) {
+ if (KNOWN_UNSUPPORTED_DATABASE_VARIANTS.contains(databaseType)) {
+ continue;
+ }
+
Class extends AbstractResponse> maxMindClass = TYPE_TO_MAX_MIND_CLASS.get(databaseType);
Set supportedFields = TYPE_TO_SUPPORTED_FIELDS_MAP.get(databaseType);
Set unsupportedFields = TYPE_TO_UNSUPPORTED_FIELDS_MAP.get(databaseType);
@@ -468,36 +474,6 @@ public void testUnknownMaxMindResponseClassess() {
);
}
- /*
- * This tests that this test has a mapping in TYPE_TO_MAX_MIND_CLASS for all MaxMind classes exposed through IpDatabase.
- */
- public void testUsedMaxMindResponseClassesAreAccountedFor() {
- Set> usedMaxMindResponseClasses = getUsedMaxMindResponseClasses();
- Set> supportedMaxMindClasses = new HashSet<>(TYPE_TO_MAX_MIND_CLASS.values());
- Set> usedButNotSupportedMaxMindResponseClasses = Sets.difference(
- usedMaxMindResponseClasses,
- supportedMaxMindClasses
- );
- assertThat(
- "IpDatabase exposes MaxMind response classes that this test does not know what to do with. Add mappings to "
- + "TYPE_TO_MAX_MIND_CLASS for the following: "
- + usedButNotSupportedMaxMindResponseClasses,
- usedButNotSupportedMaxMindResponseClasses,
- empty()
- );
- Set> supportedButNotUsedMaxMindClasses = Sets.difference(
- supportedMaxMindClasses,
- usedMaxMindResponseClasses
- );
- assertThat(
- "This test claims to support MaxMind response classes that are not exposed in IpDatabase. Remove the following from "
- + "TYPE_TO_MAX_MIND_CLASS: "
- + supportedButNotUsedMaxMindClasses,
- supportedButNotUsedMaxMindClasses,
- empty()
- );
- }
-
/*
* This is the list of field types that causes us to stop recursing. That is, fields of these types are the lowest-level fields that
* we care about.
@@ -616,23 +592,4 @@ private static String getFormattedList(Set fields) {
}
return result.toString();
}
-
- /*
- * This returns all AbstractResponse classes that are returned from getter methods on IpDatabase.
- */
- private static Set> getUsedMaxMindResponseClasses() {
- Set> result = new HashSet<>();
- Method[] methods = IpDatabase.class.getMethods();
- for (Method method : methods) {
- if (method.getName().startsWith("get")) {
- Class> returnType = method.getReturnType();
- try {
- result.add(returnType.asSubclass(AbstractResponse.class));
- } catch (ClassCastException ignore) {
- // This is not what we were looking for, move on
- }
- }
- }
- return result;
- }
}
diff --git a/modules/ingest-geoip/src/test/resources/ipinfo/asn_sample.mmdb b/modules/ingest-geoip/src/test/resources/ipinfo/asn_sample.mmdb
new file mode 100644
index 0000000000000..916a8252a5df1
Binary files /dev/null and b/modules/ingest-geoip/src/test/resources/ipinfo/asn_sample.mmdb differ
diff --git a/modules/ingest-geoip/src/test/resources/ipinfo/ip_asn_sample.mmdb b/modules/ingest-geoip/src/test/resources/ipinfo/ip_asn_sample.mmdb
new file mode 100644
index 0000000000000..3e1fc49ba48a5
Binary files /dev/null and b/modules/ingest-geoip/src/test/resources/ipinfo/ip_asn_sample.mmdb differ
diff --git a/modules/ingest-geoip/src/test/resources/ipinfo/ip_country_sample.mmdb b/modules/ingest-geoip/src/test/resources/ipinfo/ip_country_sample.mmdb
new file mode 100644
index 0000000000000..88428315ee8d6
Binary files /dev/null and b/modules/ingest-geoip/src/test/resources/ipinfo/ip_country_sample.mmdb differ
diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryMetricsTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryMetricsTests.java
new file mode 100644
index 0000000000000..a9bf0afa37e18
--- /dev/null
+++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryMetricsTests.java
@@ -0,0 +1,468 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.repositories.azure;
+
+import com.sun.net.httpserver.HttpExchange;
+import com.sun.net.httpserver.HttpHandler;
+
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.blobstore.BlobContainer;
+import org.elasticsearch.common.blobstore.BlobPath;
+import org.elasticsearch.common.blobstore.OperationPurpose;
+import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.core.SuppressForbidden;
+import org.elasticsearch.plugins.PluginsService;
+import org.elasticsearch.repositories.RepositoriesMetrics;
+import org.elasticsearch.repositories.RepositoriesService;
+import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
+import org.elasticsearch.repositories.blobstore.RequestedRangeNotSatisfiedException;
+import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.telemetry.Measurement;
+import org.elasticsearch.telemetry.TestTelemetryPlugin;
+import org.junit.After;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.function.Consumer;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static org.elasticsearch.repositories.azure.AbstractAzureServerTestCase.randomBlobContent;
+import static org.hamcrest.Matchers.greaterThanOrEqualTo;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+@SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
+public class AzureBlobStoreRepositoryMetricsTests extends AzureBlobStoreRepositoryTests {
+
+ private static final Predicate GET_BLOB_REQUEST_PREDICATE = request -> GET_BLOB_PATTERN.test(
+ request.getRequestMethod() + " " + request.getRequestURI()
+ );
+ private static final int MAX_RETRIES = 3;
+
+ private final Queue requestHandlers = new ConcurrentLinkedQueue<>();
+
+ @Override
+ protected Map createHttpHandlers() {
+ Map httpHandlers = super.createHttpHandlers();
+ assert httpHandlers.size() == 1 : "This assumes there's a single handler";
+ return httpHandlers.entrySet()
+ .stream()
+ .collect(Collectors.toMap(Map.Entry::getKey, e -> new ResponseInjectingAzureHttpHandler(requestHandlers, e.getValue())));
+ }
+
+ /**
+ * We want to control the errors in this test
+ */
+ @Override
+ protected HttpHandler createErroneousHttpHandler(HttpHandler delegate) {
+ return delegate;
+ }
+
+ @After
+ public void checkRequestHandlerQueue() {
+ if (requestHandlers.isEmpty() == false) {
+ fail("There were unused request handlers left in the queue, this is probably a broken test");
+ }
+ }
+
+ private static BlobContainer getBlobContainer(String dataNodeName, String repository) {
+ final var blobStoreRepository = (BlobStoreRepository) internalCluster().getInstance(RepositoriesService.class, dataNodeName)
+ .repository(repository);
+ return blobStoreRepository.blobStore().blobContainer(BlobPath.EMPTY.add(randomIdentifier()));
+ }
+
+ public void testThrottleResponsesAreCountedInMetrics() throws IOException {
+ final String repository = createRepository(randomRepositoryName());
+ final String dataNodeName = internalCluster().getNodeNameThat(DiscoveryNode::canContainData);
+ final BlobContainer blobContainer = getBlobContainer(dataNodeName, repository);
+
+ // Create a blob
+ final String blobName = "index-" + randomIdentifier();
+ final OperationPurpose purpose = randomFrom(OperationPurpose.values());
+ blobContainer.writeBlob(purpose, blobName, BytesReference.fromByteBuffer(ByteBuffer.wrap(randomBlobContent())), false);
+ clearMetrics(dataNodeName);
+
+ // Queue up some throttle responses
+ final int numThrottles = randomIntBetween(1, MAX_RETRIES);
+ IntStream.range(0, numThrottles).forEach(i -> requestHandlers.offer(new FixedRequestHandler(RestStatus.TOO_MANY_REQUESTS)));
+
+ // Check that the blob exists
+ blobContainer.blobExists(purpose, blobName);
+
+ // Correct metrics are recorded
+ metricsAsserter(dataNodeName, purpose, AzureBlobStore.Operation.GET_BLOB_PROPERTIES, repository).expectMetrics()
+ .withRequests(numThrottles + 1)
+ .withThrottles(numThrottles)
+ .withExceptions(numThrottles)
+ .forResult(MetricsAsserter.Result.Success);
+ }
+
+ public void testRangeNotSatisfiedAreCountedInMetrics() throws IOException {
+ final String repository = createRepository(randomRepositoryName());
+ final String dataNodeName = internalCluster().getNodeNameThat(DiscoveryNode::canContainData);
+ final BlobContainer blobContainer = getBlobContainer(dataNodeName, repository);
+
+ // Create a blob
+ final String blobName = "index-" + randomIdentifier();
+ final OperationPurpose purpose = randomFrom(OperationPurpose.values());
+ blobContainer.writeBlob(purpose, blobName, BytesReference.fromByteBuffer(ByteBuffer.wrap(randomBlobContent())), false);
+ clearMetrics(dataNodeName);
+
+ // Queue up a range-not-satisfied error
+ requestHandlers.offer(new FixedRequestHandler(RestStatus.REQUESTED_RANGE_NOT_SATISFIED, null, GET_BLOB_REQUEST_PREDICATE));
+
+ // Attempt to read the blob
+ assertThrows(RequestedRangeNotSatisfiedException.class, () -> blobContainer.readBlob(purpose, blobName));
+
+ // Correct metrics are recorded
+ metricsAsserter(dataNodeName, purpose, AzureBlobStore.Operation.GET_BLOB, repository).expectMetrics()
+ .withRequests(1)
+ .withThrottles(0)
+ .withExceptions(1)
+ .forResult(MetricsAsserter.Result.RangeNotSatisfied);
+ }
+
+ public void testErrorResponsesAreCountedInMetrics() throws IOException {
+ final String repository = createRepository(randomRepositoryName());
+ final String dataNodeName = internalCluster().getNodeNameThat(DiscoveryNode::canContainData);
+ final BlobContainer blobContainer = getBlobContainer(dataNodeName, repository);
+
+ // Create a blob
+ final String blobName = "index-" + randomIdentifier();
+ final OperationPurpose purpose = randomFrom(OperationPurpose.values());
+ blobContainer.writeBlob(purpose, blobName, BytesReference.fromByteBuffer(ByteBuffer.wrap(randomBlobContent())), false);
+ clearMetrics(dataNodeName);
+
+ // Queue some retry-able error responses
+ final int numErrors = randomIntBetween(1, MAX_RETRIES);
+ final AtomicInteger throttles = new AtomicInteger();
+ IntStream.range(0, numErrors).forEach(i -> {
+ RestStatus status = randomFrom(RestStatus.INTERNAL_SERVER_ERROR, RestStatus.TOO_MANY_REQUESTS, RestStatus.SERVICE_UNAVAILABLE);
+ if (status == RestStatus.TOO_MANY_REQUESTS) {
+ throttles.incrementAndGet();
+ }
+ requestHandlers.offer(new FixedRequestHandler(status));
+ });
+
+ // Check that the blob exists
+ blobContainer.blobExists(purpose, blobName);
+
+ // Correct metrics are recorded
+ metricsAsserter(dataNodeName, purpose, AzureBlobStore.Operation.GET_BLOB_PROPERTIES, repository).expectMetrics()
+ .withRequests(numErrors + 1)
+ .withThrottles(throttles.get())
+ .withExceptions(numErrors)
+ .forResult(MetricsAsserter.Result.Success);
+ }
+
+ public void testRequestFailuresAreCountedInMetrics() {
+ final String repository = createRepository(randomRepositoryName());
+ final String dataNodeName = internalCluster().getNodeNameThat(DiscoveryNode::canContainData);
+ final BlobContainer blobContainer = getBlobContainer(dataNodeName, repository);
+ clearMetrics(dataNodeName);
+
+ // Repeatedly cause a connection error to exhaust retries
+ IntStream.range(0, MAX_RETRIES + 1).forEach(i -> requestHandlers.offer((exchange, delegate) -> exchange.close()));
+
+ // Hit the API
+ OperationPurpose purpose = randomFrom(OperationPurpose.values());
+ assertThrows(IOException.class, () -> blobContainer.listBlobs(purpose));
+
+ // Correct metrics are recorded
+ metricsAsserter(dataNodeName, purpose, AzureBlobStore.Operation.LIST_BLOBS, repository).expectMetrics()
+ .withRequests(4)
+ .withThrottles(0)
+ .withExceptions(4)
+ .forResult(MetricsAsserter.Result.Exception);
+ }
+
+ public void testRequestTimeIsAccurate() throws IOException {
+ final String repository = createRepository(randomRepositoryName());
+ final String dataNodeName = internalCluster().getNodeNameThat(DiscoveryNode::canContainData);
+ final BlobContainer blobContainer = getBlobContainer(dataNodeName, repository);
+ clearMetrics(dataNodeName);
+
+ AtomicLong totalDelayMillis = new AtomicLong(0);
+ // Add some artificial delays
+ IntStream.range(0, randomIntBetween(1, MAX_RETRIES)).forEach(i -> {
+ long thisDelay = randomLongBetween(10, 100);
+ totalDelayMillis.addAndGet(thisDelay);
+ requestHandlers.offer((exchange, delegate) -> {
+ safeSleep(thisDelay);
+ // return a retry-able error
+ exchange.sendResponseHeaders(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), -1);
+ });
+ });
+
+ // Hit the API
+ final long startTimeMillis = System.currentTimeMillis();
+ blobContainer.listBlobs(randomFrom(OperationPurpose.values()));
+ final long elapsedTimeMillis = System.currentTimeMillis() - startTimeMillis;
+
+ List longHistogramMeasurement = getTelemetryPlugin(dataNodeName).getLongHistogramMeasurement(
+ RepositoriesMetrics.HTTP_REQUEST_TIME_IN_MILLIS_HISTOGRAM
+ );
+ long recordedRequestTime = longHistogramMeasurement.get(0).getLong();
+ // Request time should be >= the delays we simulated
+ assertThat(recordedRequestTime, greaterThanOrEqualTo(totalDelayMillis.get()));
+ // And <= the elapsed time for the request
+ assertThat(recordedRequestTime, lessThanOrEqualTo(elapsedTimeMillis));
+ }
+
+ private void clearMetrics(String discoveryNode) {
+ internalCluster().getInstance(PluginsService.class, discoveryNode)
+ .filterPlugins(TestTelemetryPlugin.class)
+ .forEach(TestTelemetryPlugin::resetMeter);
+ }
+
+ private MetricsAsserter metricsAsserter(
+ String dataNodeName,
+ OperationPurpose operationPurpose,
+ AzureBlobStore.Operation operation,
+ String repository
+ ) {
+ return new MetricsAsserter(dataNodeName, operationPurpose, operation, repository);
+ }
+
+ private class MetricsAsserter {
+ private final String dataNodeName;
+ private final OperationPurpose purpose;
+ private final AzureBlobStore.Operation operation;
+ private final String repository;
+
+ enum Result {
+ Success,
+ Failure,
+ RangeNotSatisfied,
+ Exception
+ }
+
+ enum MetricType {
+ LongHistogram {
+ @Override
+ List getMeasurements(TestTelemetryPlugin testTelemetryPlugin, String name) {
+ return testTelemetryPlugin.getLongHistogramMeasurement(name);
+ }
+ },
+ LongCounter {
+ @Override
+ List getMeasurements(TestTelemetryPlugin testTelemetryPlugin, String name) {
+ return testTelemetryPlugin.getLongCounterMeasurement(name);
+ }
+ };
+
+ abstract List getMeasurements(TestTelemetryPlugin testTelemetryPlugin, String name);
+ }
+
+ private MetricsAsserter(String dataNodeName, OperationPurpose purpose, AzureBlobStore.Operation operation, String repository) {
+ this.dataNodeName = dataNodeName;
+ this.purpose = purpose;
+ this.operation = operation;
+ this.repository = repository;
+ }
+
+ private class Expectations {
+ private int expectedRequests;
+ private int expectedThrottles;
+ private int expectedExceptions;
+
+ public Expectations withRequests(int expectedRequests) {
+ this.expectedRequests = expectedRequests;
+ return this;
+ }
+
+ public Expectations withThrottles(int expectedThrottles) {
+ this.expectedThrottles = expectedThrottles;
+ return this;
+ }
+
+ public Expectations withExceptions(int expectedExceptions) {
+ this.expectedExceptions = expectedExceptions;
+ return this;
+ }
+
+ public void forResult(Result result) {
+ assertMetricsRecorded(expectedRequests, expectedThrottles, expectedExceptions, result);
+ }
+ }
+
+ Expectations expectMetrics() {
+ return new Expectations();
+ }
+
+ private void assertMetricsRecorded(int expectedRequests, int expectedThrottles, int expectedExceptions, Result result) {
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_OPERATIONS_TOTAL, 1);
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_REQUESTS_TOTAL, expectedRequests);
+
+ if (expectedThrottles > 0) {
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_THROTTLES_TOTAL, expectedThrottles);
+ assertIntMetricRecorded(MetricType.LongHistogram, RepositoriesMetrics.METRIC_THROTTLES_HISTOGRAM, expectedThrottles);
+ } else {
+ assertNoMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_THROTTLES_TOTAL);
+ assertNoMetricRecorded(MetricType.LongHistogram, RepositoriesMetrics.METRIC_THROTTLES_HISTOGRAM);
+ }
+
+ if (expectedExceptions > 0) {
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_EXCEPTIONS_TOTAL, expectedExceptions);
+ assertIntMetricRecorded(MetricType.LongHistogram, RepositoriesMetrics.METRIC_EXCEPTIONS_HISTOGRAM, expectedExceptions);
+ } else {
+ assertNoMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_EXCEPTIONS_TOTAL);
+ assertNoMetricRecorded(MetricType.LongHistogram, RepositoriesMetrics.METRIC_EXCEPTIONS_HISTOGRAM);
+ }
+
+ if (result == Result.RangeNotSatisfied || result == Result.Failure || result == Result.Exception) {
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_UNSUCCESSFUL_OPERATIONS_TOTAL, 1);
+ } else {
+ assertNoMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_UNSUCCESSFUL_OPERATIONS_TOTAL);
+ }
+
+ if (result == Result.RangeNotSatisfied) {
+ assertIntMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_EXCEPTIONS_REQUEST_RANGE_NOT_SATISFIED_TOTAL, 1);
+ } else {
+ assertNoMetricRecorded(MetricType.LongCounter, RepositoriesMetrics.METRIC_EXCEPTIONS_REQUEST_RANGE_NOT_SATISFIED_TOTAL);
+ }
+
+ assertMatchingMetricRecorded(
+ MetricType.LongHistogram,
+ RepositoriesMetrics.HTTP_REQUEST_TIME_IN_MILLIS_HISTOGRAM,
+ m -> assertThat("No request time metric found", m.getLong(), greaterThanOrEqualTo(0L))
+ );
+ }
+
+ private void assertIntMetricRecorded(MetricType metricType, String metricName, int expectedValue) {
+ assertMatchingMetricRecorded(
+ metricType,
+ metricName,
+ measurement -> assertEquals("Unexpected value for " + metricType + " " + metricName, expectedValue, measurement.getLong())
+ );
+ }
+
+ private void assertNoMetricRecorded(MetricType metricType, String metricName) {
+ assertThat(
+ "Expected no values for " + metricType + " " + metricName,
+ metricType.getMeasurements(getTelemetryPlugin(dataNodeName), metricName),
+ hasSize(0)
+ );
+ }
+
+ private void assertMatchingMetricRecorded(MetricType metricType, String metricName, Consumer assertion) {
+ List measurements = metricType.getMeasurements(getTelemetryPlugin(dataNodeName), metricName);
+ Measurement measurement = measurements.stream()
+ .filter(
+ m -> m.attributes().get("operation").equals(operation.getKey())
+ && m.attributes().get("purpose").equals(purpose.getKey())
+ && m.attributes().get("repo_name").equals(repository)
+ && m.attributes().get("repo_type").equals("azure")
+ )
+ .findFirst()
+ .orElseThrow(
+ () -> new IllegalStateException(
+ "No metric found with name="
+ + metricName
+ + " and operation="
+ + operation.getKey()
+ + " and purpose="
+ + purpose.getKey()
+ + " and repo_name="
+ + repository
+ + " in "
+ + measurements
+ )
+ );
+
+ assertion.accept(measurement);
+ }
+ }
+
+ @SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
+ private static class ResponseInjectingAzureHttpHandler implements DelegatingHttpHandler {
+
+ private final HttpHandler delegate;
+ private final Queue requestHandlerQueue;
+
+ ResponseInjectingAzureHttpHandler(Queue requestHandlerQueue, HttpHandler delegate) {
+ this.delegate = delegate;
+ this.requestHandlerQueue = requestHandlerQueue;
+ }
+
+ @Override
+ public void handle(HttpExchange exchange) throws IOException {
+ RequestHandler nextHandler = requestHandlerQueue.peek();
+ if (nextHandler != null && nextHandler.matchesRequest(exchange)) {
+ requestHandlerQueue.poll().writeResponse(exchange, delegate);
+ } else {
+ delegate.handle(exchange);
+ }
+ }
+
+ @Override
+ public HttpHandler getDelegate() {
+ return delegate;
+ }
+ }
+
+ @SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
+ @FunctionalInterface
+ private interface RequestHandler {
+ void writeResponse(HttpExchange exchange, HttpHandler delegate) throws IOException;
+
+ default boolean matchesRequest(HttpExchange exchange) {
+ return true;
+ }
+ }
+
+ @SuppressForbidden(reason = "we use a HttpServer to emulate Azure")
+ private static class FixedRequestHandler implements RequestHandler {
+
+ private final RestStatus status;
+ private final String responseBody;
+ private final Predicate requestMatcher;
+
+ FixedRequestHandler(RestStatus status) {
+ this(status, null, req -> true);
+ }
+
+ /**
+ * Create a handler that only gets executed for requests that match the supplied predicate. Note
+ * that because the errors are stored in a queue this will prevent any subsequently queued errors from
+ * being returned until after it returns.
+ */
+ FixedRequestHandler(RestStatus status, String responseBody, Predicate requestMatcher) {
+ this.status = status;
+ this.responseBody = responseBody;
+ this.requestMatcher = requestMatcher;
+ }
+
+ @Override
+ public boolean matchesRequest(HttpExchange exchange) {
+ return requestMatcher.test(exchange);
+ }
+
+ @Override
+ public void writeResponse(HttpExchange exchange, HttpHandler delegateHandler) throws IOException {
+ if (responseBody != null) {
+ byte[] responseBytes = responseBody.getBytes(StandardCharsets.UTF_8);
+ exchange.sendResponseHeaders(status.getStatus(), responseBytes.length);
+ exchange.getResponseBody().write(responseBytes);
+ } else {
+ exchange.sendResponseHeaders(status.getStatus(), -1);
+ }
+ }
+ }
+}
diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java
index 1b7628cc0ad8e..473d91da6e34c 100644
--- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java
+++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java
@@ -16,11 +16,13 @@
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
+import org.elasticsearch.action.support.broadcast.BroadcastResponse;
import org.elasticsearch.common.Randomness;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.blobstore.BlobContainer;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
+import org.elasticsearch.common.blobstore.OperationPurpose;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.MockSecureSettings;
@@ -30,8 +32,15 @@
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.plugins.PluginsService;
+import org.elasticsearch.repositories.RepositoriesService;
+import org.elasticsearch.repositories.RepositoryMissingException;
+import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase;
import org.elasticsearch.rest.RestStatus;
+import org.elasticsearch.telemetry.Measurement;
+import org.elasticsearch.telemetry.TestTelemetryPlugin;
+import org.elasticsearch.test.BackgroundIndexer;
import java.io.ByteArrayInputStream;
import java.io.IOException;
@@ -41,22 +50,33 @@
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.atomic.LongAdder;
import java.util.function.Predicate;
import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import static org.elasticsearch.repositories.RepositoriesMetrics.METRIC_OPERATIONS_TOTAL;
import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
+import static org.hamcrest.Matchers.allOf;
import static org.hamcrest.Matchers.anEmptyMap;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.hasEntry;
+import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.is;
@SuppressForbidden(reason = "this test uses a HttpServer to emulate an Azure endpoint")
public class AzureBlobStoreRepositoryTests extends ESMockAPIBasedRepositoryIntegTestCase {
- private static final String DEFAULT_ACCOUNT_NAME = "account";
+ protected static final String DEFAULT_ACCOUNT_NAME = "account";
+ protected static final Predicate LIST_PATTERN = Pattern.compile("GET /[a-zA-Z0-9]+/[a-zA-Z0-9]+\\?.+").asMatchPredicate();
+ protected static final Predicate GET_BLOB_PATTERN = Pattern.compile("GET /[a-zA-Z0-9]+/[a-zA-Z0-9]+/.+").asMatchPredicate();
@Override
protected String repositoryType() {
@@ -78,7 +98,7 @@ protected Settings repositorySettings(String repoName) {
@Override
protected Collection> nodePlugins() {
- return Collections.singletonList(TestAzureRepositoryPlugin.class);
+ return List.of(TestAzureRepositoryPlugin.class, TestTelemetryPlugin.class);
}
@Override
@@ -91,7 +111,7 @@ protected Map createHttpHandlers() {
@Override
protected HttpHandler createErroneousHttpHandler(final HttpHandler delegate) {
- return new AzureErroneousHttpHandler(delegate, AzureStorageSettings.DEFAULT_MAX_RETRIES);
+ return new AzureHTTPStatsCollectorHandler(new AzureErroneousHttpHandler(delegate, AzureStorageSettings.DEFAULT_MAX_RETRIES));
}
@Override
@@ -119,6 +139,13 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
.build();
}
+ protected TestTelemetryPlugin getTelemetryPlugin(String dataNodeName) {
+ return internalCluster().getInstance(PluginsService.class, dataNodeName)
+ .filterPlugins(TestTelemetryPlugin.class)
+ .findFirst()
+ .orElseThrow();
+ }
+
/**
* AzureRepositoryPlugin that allows to set low values for the Azure's client retry policy
* and for BlobRequestOptions#getSingleBlobPutThresholdInBytes().
@@ -195,9 +222,6 @@ protected String requestUniqueId(final HttpExchange exchange) {
*/
@SuppressForbidden(reason = "this test uses a HttpServer to emulate an Azure endpoint")
private static class AzureHTTPStatsCollectorHandler extends HttpStatsCollectorHandler {
- private static final Predicate LIST_PATTERN = Pattern.compile("GET /[a-zA-Z0-9]+/[a-zA-Z0-9]+\\?.+").asMatchPredicate();
- private static final Predicate GET_BLOB_PATTERN = Pattern.compile("GET /[a-zA-Z0-9]+/[a-zA-Z0-9]+/.+").asMatchPredicate();
-
private final Set seenRequestIds = ConcurrentCollections.newConcurrentSet();
private AzureHTTPStatsCollectorHandler(HttpHandler delegate) {
@@ -303,4 +327,87 @@ public void testReadByteByByte() throws Exception {
container.delete(randomPurpose());
}
}
+
+ public void testMetrics() throws Exception {
+ // Reset all the metrics so there's none lingering from previous tests
+ internalCluster().getInstances(PluginsService.class)
+ .forEach(ps -> ps.filterPlugins(TestTelemetryPlugin.class).forEach(TestTelemetryPlugin::resetMeter));
+
+ // Create the repository and perform some activities
+ final String repository = createRepository(randomRepositoryName(), false);
+ final String index = "index-no-merges";
+ createIndex(index, 1, 0);
+
+ final long nbDocs = randomLongBetween(10_000L, 20_000L);
+ try (BackgroundIndexer indexer = new BackgroundIndexer(index, client(), (int) nbDocs)) {
+ waitForDocs(nbDocs, indexer);
+ }
+ flushAndRefresh(index);
+ BroadcastResponse forceMerge = client().admin().indices().prepareForceMerge(index).setFlush(true).setMaxNumSegments(1).get();
+ assertThat(forceMerge.getSuccessfulShards(), equalTo(1));
+ assertHitCount(prepareSearch(index).setSize(0).setTrackTotalHits(true), nbDocs);
+
+ final String snapshot = "snapshot";
+ assertSuccessfulSnapshot(
+ clusterAdmin().prepareCreateSnapshot(TEST_REQUEST_TIMEOUT, repository, snapshot).setWaitForCompletion(true).setIndices(index)
+ );
+ assertAcked(client().admin().indices().prepareDelete(index));
+ assertSuccessfulRestore(
+ clusterAdmin().prepareRestoreSnapshot(TEST_REQUEST_TIMEOUT, repository, snapshot).setWaitForCompletion(true)
+ );
+ ensureGreen(index);
+ assertHitCount(prepareSearch(index).setSize(0).setTrackTotalHits(true), nbDocs);
+ assertAcked(clusterAdmin().prepareDeleteSnapshot(TEST_REQUEST_TIMEOUT, repository, snapshot).get());
+
+ final Map aggregatedMetrics = new HashMap<>();
+ // Compare collected stats and metrics for each node and they should be the same
+ for (var nodeName : internalCluster().getNodeNames()) {
+ final BlobStoreRepository blobStoreRepository;
+ try {
+ blobStoreRepository = (BlobStoreRepository) internalCluster().getInstance(RepositoriesService.class, nodeName)
+ .repository(repository);
+ } catch (RepositoryMissingException e) {
+ continue;
+ }
+
+ final AzureBlobStore blobStore = (AzureBlobStore) blobStoreRepository.blobStore();
+ final Map statsCollectors = blobStore.getMetricsRecorder().opsCounters;
+
+ final List metrics = Measurement.combine(
+ getTelemetryPlugin(nodeName).getLongCounterMeasurement(METRIC_OPERATIONS_TOTAL)
+ );
+
+ assertThat(
+ statsCollectors.keySet().stream().map(AzureBlobStore.StatsKey::operation).collect(Collectors.toSet()),
+ equalTo(
+ metrics.stream()
+ .map(m -> AzureBlobStore.Operation.fromKey((String) m.attributes().get("operation")))
+ .collect(Collectors.toSet())
+ )
+ );
+ metrics.forEach(metric -> {
+ assertThat(
+ metric.attributes(),
+ allOf(hasEntry("repo_type", AzureRepository.TYPE), hasKey("repo_name"), hasKey("operation"), hasKey("purpose"))
+ );
+ final AzureBlobStore.Operation operation = AzureBlobStore.Operation.fromKey((String) metric.attributes().get("operation"));
+ final AzureBlobStore.StatsKey statsKey = new AzureBlobStore.StatsKey(
+ operation,
+ OperationPurpose.parse((String) metric.attributes().get("purpose"))
+ );
+ assertThat(nodeName + "/" + statsKey + " exists", statsCollectors, hasKey(statsKey));
+ assertThat(nodeName + "/" + statsKey + " has correct sum", metric.getLong(), equalTo(statsCollectors.get(statsKey).sum()));
+ aggregatedMetrics.compute(statsKey.operation(), (k, v) -> v == null ? metric.getLong() : v + metric.getLong());
+ });
+ }
+
+ // Metrics number should be consistent with server side request count as well.
+ assertThat(aggregatedMetrics, equalTo(getServerMetrics()));
+ }
+
+ private Map getServerMetrics() {
+ return getMockRequestCounts().entrySet()
+ .stream()
+ .collect(Collectors.toMap(e -> AzureBlobStore.Operation.fromKey(e.getKey()), Map.Entry::getValue));
+ }
}
diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java
index 5466989082129..d520d30f2bac6 100644
--- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java
+++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java
@@ -60,6 +60,7 @@
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Tuple;
+import org.elasticsearch.repositories.RepositoriesMetrics;
import org.elasticsearch.repositories.azure.AzureRepository.Repository;
import org.elasticsearch.repositories.blobstore.ChunkedBlobOutputStream;
import org.elasticsearch.rest.RestStatus;
@@ -86,11 +87,11 @@
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.BiPredicate;
-import java.util.function.Consumer;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@@ -102,59 +103,54 @@ public class AzureBlobStore implements BlobStore {
private static final int DEFAULT_UPLOAD_BUFFERS_SIZE = (int) new ByteSizeValue(64, ByteSizeUnit.KB).getBytes();
private final AzureStorageService service;
-
private final BigArrays bigArrays;
+ private final RepositoryMetadata repositoryMetadata;
private final String clientName;
private final String container;
private final LocationMode locationMode;
private final ByteSizeValue maxSinglePartUploadSize;
- private final StatsCollectors statsCollectors = new StatsCollectors();
- private final AzureClientProvider.SuccessfulRequestHandler statsConsumer;
+ private final RequestMetricsRecorder requestMetricsRecorder;
+ private final AzureClientProvider.RequestMetricsHandler requestMetricsHandler;
- public AzureBlobStore(RepositoryMetadata metadata, AzureStorageService service, BigArrays bigArrays) {
+ public AzureBlobStore(
+ RepositoryMetadata metadata,
+ AzureStorageService service,
+ BigArrays bigArrays,
+ RepositoriesMetrics repositoriesMetrics
+ ) {
this.container = Repository.CONTAINER_SETTING.get(metadata.settings());
this.clientName = Repository.CLIENT_NAME.get(metadata.settings());
this.service = service;
this.bigArrays = bigArrays;
+ this.requestMetricsRecorder = new RequestMetricsRecorder(repositoriesMetrics);
+ this.repositoryMetadata = metadata;
// locationMode is set per repository, not per client
this.locationMode = Repository.LOCATION_MODE_SETTING.get(metadata.settings());
this.maxSinglePartUploadSize = Repository.MAX_SINGLE_PART_UPLOAD_SIZE_SETTING.get(metadata.settings());
- List requestStatsCollectors = List.of(
- RequestStatsCollector.create(
- (httpMethod, url) -> httpMethod == HttpMethod.HEAD,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.GET_BLOB_PROPERTIES, purpose)
- ),
- RequestStatsCollector.create(
+ List requestMatchers = List.of(
+ new RequestMatcher((httpMethod, url) -> httpMethod == HttpMethod.HEAD, Operation.GET_BLOB_PROPERTIES),
+ new RequestMatcher(
(httpMethod, url) -> httpMethod == HttpMethod.GET && isListRequest(httpMethod, url) == false,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.GET_BLOB, purpose)
- ),
- RequestStatsCollector.create(
- AzureBlobStore::isListRequest,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.LIST_BLOBS, purpose)
- ),
- RequestStatsCollector.create(
- AzureBlobStore::isPutBlockRequest,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.PUT_BLOCK, purpose)
+ Operation.GET_BLOB
),
- RequestStatsCollector.create(
- AzureBlobStore::isPutBlockListRequest,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.PUT_BLOCK_LIST, purpose)
- ),
- RequestStatsCollector.create(
+ new RequestMatcher(AzureBlobStore::isListRequest, Operation.LIST_BLOBS),
+ new RequestMatcher(AzureBlobStore::isPutBlockRequest, Operation.PUT_BLOCK),
+ new RequestMatcher(AzureBlobStore::isPutBlockListRequest, Operation.PUT_BLOCK_LIST),
+ new RequestMatcher(
// https://docs.microsoft.com/en-us/rest/api/storageservices/put-blob#uri-parameters
// The only URI parameter allowed for put-blob operation is "timeout", but if a sas token is used,
// it's possible that the URI parameters contain additional parameters unrelated to the upload type.
(httpMethod, url) -> httpMethod == HttpMethod.PUT
&& isPutBlockRequest(httpMethod, url) == false
&& isPutBlockListRequest(httpMethod, url) == false,
- purpose -> statsCollectors.onSuccessfulRequest(Operation.PUT_BLOB, purpose)
+ Operation.PUT_BLOB
)
);
- this.statsConsumer = (purpose, httpMethod, url) -> {
+ this.requestMetricsHandler = (purpose, method, url, metrics) -> {
try {
URI uri = url.toURI();
String path = uri.getPath() == null ? "" : uri.getPath();
@@ -167,9 +163,9 @@ && isPutBlockListRequest(httpMethod, url) == false,
return;
}
- for (RequestStatsCollector requestStatsCollector : requestStatsCollectors) {
- if (requestStatsCollector.shouldConsumeRequestInfo(httpMethod, url)) {
- requestStatsCollector.consumeHttpRequestInfo(purpose);
+ for (RequestMatcher requestMatcher : requestMatchers) {
+ if (requestMatcher.matches(method, url)) {
+ requestMetricsRecorder.onRequestComplete(requestMatcher.operation, purpose, metrics);
return;
}
}
@@ -665,12 +661,12 @@ private BlobServiceAsyncClient asyncClient(OperationPurpose purpose) {
}
private AzureBlobServiceClient getAzureBlobServiceClientClient(OperationPurpose purpose) {
- return service.client(clientName, locationMode, purpose, statsConsumer);
+ return service.client(clientName, locationMode, purpose, requestMetricsHandler);
}
@Override
public Map stats() {
- return statsCollectors.statsMap(service.isStateless());
+ return requestMetricsRecorder.statsMap(service.isStateless());
}
// visible for testing
@@ -691,26 +687,43 @@ public String getKey() {
Operation(String key) {
this.key = key;
}
+
+ public static Operation fromKey(String key) {
+ for (Operation operation : Operation.values()) {
+ if (operation.key.equals(key)) {
+ return operation;
+ }
+ }
+ throw new IllegalArgumentException("No matching key: " + key);
+ }
}
- private record StatsKey(Operation operation, OperationPurpose purpose) {
+ // visible for testing
+ record StatsKey(Operation operation, OperationPurpose purpose) {
@Override
public String toString() {
return purpose.getKey() + "_" + operation.getKey();
}
}
- private static class StatsCollectors {
- final Map collectors = new ConcurrentHashMap<>();
+ // visible for testing
+ class RequestMetricsRecorder {
+ private final RepositoriesMetrics repositoriesMetrics;
+ final Map opsCounters = new ConcurrentHashMap<>();
+ final Map> opsAttributes = new ConcurrentHashMap<>();
+
+ RequestMetricsRecorder(RepositoriesMetrics repositoriesMetrics) {
+ this.repositoriesMetrics = repositoriesMetrics;
+ }
Map statsMap(boolean stateless) {
if (stateless) {
- return collectors.entrySet()
+ return opsCounters.entrySet()
.stream()
.collect(Collectors.toUnmodifiableMap(e -> e.getKey().toString(), e -> e.getValue().sum()));
} else {
Map normalisedStats = Arrays.stream(Operation.values()).collect(Collectors.toMap(Operation::getKey, o -> 0L));
- collectors.forEach(
+ opsCounters.forEach(
(key, value) -> normalisedStats.compute(
key.operation.getKey(),
(k, current) -> Objects.requireNonNull(current) + value.sum()
@@ -720,11 +733,50 @@ Map statsMap(boolean stateless) {
}
}
- public void onSuccessfulRequest(Operation operation, OperationPurpose purpose) {
- collectors.computeIfAbsent(new StatsKey(operation, purpose), k -> new LongAdder()).increment();
+ public void onRequestComplete(Operation operation, OperationPurpose purpose, AzureClientProvider.RequestMetrics requestMetrics) {
+ final StatsKey statsKey = new StatsKey(operation, purpose);
+ final LongAdder counter = opsCounters.computeIfAbsent(statsKey, k -> new LongAdder());
+ final Map attributes = opsAttributes.computeIfAbsent(
+ statsKey,
+ k -> RepositoriesMetrics.createAttributesMap(repositoryMetadata, purpose, operation.getKey())
+ );
+
+ counter.add(1);
+
+ // range not satisfied is not retried, so we count them by checking the final response
+ if (requestMetrics.getStatusCode() == RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus()) {
+ repositoriesMetrics.requestRangeNotSatisfiedExceptionCounter().incrementBy(1, attributes);
+ }
+
+ repositoriesMetrics.operationCounter().incrementBy(1, attributes);
+ if (RestStatus.isSuccessful(requestMetrics.getStatusCode()) == false) {
+ repositoriesMetrics.unsuccessfulOperationCounter().incrementBy(1, attributes);
+ }
+
+ repositoriesMetrics.requestCounter().incrementBy(requestMetrics.getRequestCount(), attributes);
+ if (requestMetrics.getErrorCount() > 0) {
+ repositoriesMetrics.exceptionCounter().incrementBy(requestMetrics.getErrorCount(), attributes);
+ repositoriesMetrics.exceptionHistogram().record(requestMetrics.getErrorCount(), attributes);
+ }
+
+ if (requestMetrics.getThrottleCount() > 0) {
+ repositoriesMetrics.throttleCounter().incrementBy(requestMetrics.getThrottleCount(), attributes);
+ repositoriesMetrics.throttleHistogram().record(requestMetrics.getThrottleCount(), attributes);
+ }
+
+ // We use nanosecond precision, so a zero value indicates that no requests were executed
+ if (requestMetrics.getTotalRequestTimeNanos() > 0) {
+ repositoriesMetrics.httpRequestTimeInMillisHistogram()
+ .record(TimeUnit.NANOSECONDS.toMillis(requestMetrics.getTotalRequestTimeNanos()), attributes);
+ }
}
}
+ // visible for testing
+ RequestMetricsRecorder getMetricsRecorder() {
+ return requestMetricsRecorder;
+ }
+
private static class AzureInputStream extends InputStream {
private final CancellableRateLimitedFluxIterator cancellableRateLimitedFluxIterator;
private ByteBuf byteBuf;
@@ -846,26 +898,11 @@ private ByteBuf getNextByteBuf() throws IOException {
}
}
- private static class RequestStatsCollector {
- private final BiPredicate filter;
- private final Consumer onHttpRequest;
-
- private RequestStatsCollector(BiPredicate filter, Consumer onHttpRequest) {
- this.filter = filter;
- this.onHttpRequest = onHttpRequest;
- }
-
- static RequestStatsCollector create(BiPredicate filter, Consumer consumer) {
- return new RequestStatsCollector(filter, consumer);
- }
+ private record RequestMatcher(BiPredicate filter, Operation operation) {
- private boolean shouldConsumeRequestInfo(HttpMethod httpMethod, URL url) {
+ private boolean matches(HttpMethod httpMethod, URL url) {
return filter.test(httpMethod, url);
}
-
- private void consumeHttpRequestInfo(OperationPurpose operationPurpose) {
- onHttpRequest.accept(operationPurpose);
- }
}
OptionalBytesReference getRegister(OperationPurpose purpose, String blobPath, String containerPath, String blobKey) {
diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureClientProvider.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureClientProvider.java
index ae497ff159576..654742c980268 100644
--- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureClientProvider.java
+++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureClientProvider.java
@@ -24,6 +24,7 @@
import com.azure.core.http.HttpMethod;
import com.azure.core.http.HttpPipelineCallContext;
import com.azure.core.http.HttpPipelineNextPolicy;
+import com.azure.core.http.HttpPipelinePosition;
import com.azure.core.http.HttpRequest;
import com.azure.core.http.HttpResponse;
import com.azure.core.http.ProxyOptions;
@@ -44,11 +45,13 @@
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.repositories.azure.executors.PrivilegedExecutor;
import org.elasticsearch.repositories.azure.executors.ReactorScheduledExecutorService;
+import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.netty4.NettyAllocator;
import java.net.URL;
import java.time.Duration;
+import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadFactory;
@@ -57,6 +60,8 @@
import static org.elasticsearch.repositories.azure.AzureRepositoryPlugin.REPOSITORY_THREAD_POOL_NAME;
class AzureClientProvider extends AbstractLifecycleComponent {
+ private static final Logger logger = LogManager.getLogger(AzureClientProvider.class);
+
private static final TimeValue DEFAULT_CONNECTION_TIMEOUT = TimeValue.timeValueSeconds(30);
private static final TimeValue DEFAULT_MAX_CONNECTION_IDLE_TIME = TimeValue.timeValueSeconds(60);
private static final int DEFAULT_MAX_CONNECTIONS = 50;
@@ -160,7 +165,7 @@ AzureBlobServiceClient createClient(
LocationMode locationMode,
RequestRetryOptions retryOptions,
ProxyOptions proxyOptions,
- SuccessfulRequestHandler successfulRequestHandler,
+ RequestMetricsHandler requestMetricsHandler,
OperationPurpose purpose
) {
if (closed) {
@@ -189,8 +194,9 @@ AzureBlobServiceClient createClient(
builder.credential(credentialBuilder.build());
}
- if (successfulRequestHandler != null) {
- builder.addPolicy(new SuccessfulRequestTracker(purpose, successfulRequestHandler));
+ if (requestMetricsHandler != null) {
+ builder.addPolicy(new RequestMetricsTracker(purpose, requestMetricsHandler));
+ builder.addPolicy(RetryMetricsTracker.INSTANCE);
}
if (locationMode.isSecondary()) {
@@ -259,38 +265,135 @@ protected void doStop() {
@Override
protected void doClose() {}
- private static final class SuccessfulRequestTracker implements HttpPipelinePolicy {
- private static final Logger logger = LogManager.getLogger(SuccessfulRequestTracker.class);
+ static class RequestMetrics {
+ private volatile long totalRequestTimeNanos = 0;
+ private volatile int requestCount;
+ private volatile int errorCount;
+ private volatile int throttleCount;
+ private volatile int statusCode;
+
+ int getRequestCount() {
+ return requestCount;
+ }
+
+ int getErrorCount() {
+ return errorCount;
+ }
+
+ int getStatusCode() {
+ return statusCode;
+ }
+
+ int getThrottleCount() {
+ return throttleCount;
+ }
+
+ /**
+ * Total time spent executing requests to complete operation in nanoseconds
+ */
+ long getTotalRequestTimeNanos() {
+ return totalRequestTimeNanos;
+ }
+
+ @Override
+ public String toString() {
+ return "RequestMetrics{"
+ + "totalRequestTimeNanos="
+ + totalRequestTimeNanos
+ + ", requestCount="
+ + requestCount
+ + ", errorCount="
+ + errorCount
+ + ", throttleCount="
+ + throttleCount
+ + ", statusCode="
+ + statusCode
+ + '}';
+ }
+ }
+
+ private enum RetryMetricsTracker implements HttpPipelinePolicy {
+ INSTANCE;
+
+ @Override
+ public Mono process(HttpPipelineCallContext context, HttpPipelineNextPolicy next) {
+ Optional