From acc99302c65b06b10c2211316282ce3fb663aed4 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 17 Jun 2024 13:41:20 -0400 Subject: [PATCH 01/26] Adding hamming distance function to painless for dense_vector fields (#109359) This adds `hamming` distances, the pop-count of `xor` byte vectors as a first class citizen in painless. For byte vectors, this means that we can compute hamming distances via script_score (aka, brute-force). The implementation of `hamming` is the same that is available in Lucene, and when lucene 9.11 is merged, we should update our logic where applicable to utilize it. NOTE: this does not yet add hamming distance as a metric for indexed vectors. This will be a future PR after the Lucene 9.11 upgrade. --- .../vector/DistanceFunctionBenchmark.java | 31 +++- docs/changelog/109359.yaml | 5 + .../index.asciidoc | 1 + .../vectors/vector-functions.asciidoc | 50 +++++- .../whitelist-json/painless-score.json | 2 +- .../org.elasticsearch.script.score.txt | 1 + .../test/painless/140_dense_vector_basic.yml | 33 ++++ .../151_dense_vector_byte_hamming.yml | 156 ++++++++++++++++++ server/src/main/java/module-info.java | 1 + .../elasticsearch/script/ScriptFeatures.java | 21 +++ .../script/VectorScoreScriptUtils.java | 50 ++++++ .../field/vectors/BinaryDenseVector.java | 10 ++ .../field/vectors/ByteBinaryDenseVector.java | 14 ++ .../field/vectors/ByteKnnDenseVector.java | 14 ++ .../script/field/vectors/DenseVector.java | 31 +++- .../script/field/vectors/KnnDenseVector.java | 10 ++ ...lasticsearch.features.FeatureSpecification | 1 + .../script/VectorScoreScriptUtilsTests.java | 14 ++ 18 files changed, 438 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/109359.yaml create mode 100644 modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/151_dense_vector_byte_hamming.yml create mode 100644 server/src/main/java/org/elasticsearch/script/ScriptFeatures.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/DistanceFunctionBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/DistanceFunctionBenchmark.java index fe6ba4da29f3b..0a4c836e2a6cf 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/DistanceFunctionBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/DistanceFunctionBenchmark.java @@ -56,7 +56,7 @@ public class DistanceFunctionBenchmark { @Param({ "96" }) private int dims; - @Param({ "dot", "cosine", "l1", "l2" }) + @Param({ "dot", "cosine", "l1", "l2", "hamming" }) private String function; @Param({ "knn", "binary" }) @@ -330,6 +330,18 @@ public void execute(Consumer consumer) { } } + private static class HammingKnnByteBenchmarkFunction extends KnnByteBenchmarkFunction { + + private HammingKnnByteBenchmarkFunction(int dims) { + super(dims); + } + + @Override + public void execute(Consumer consumer) { + new ByteKnnDenseVector(docVector).hamming(queryVector); + } + } + private static class L1BinaryFloatBenchmarkFunction extends BinaryFloatBenchmarkFunction { private L1BinaryFloatBenchmarkFunction(int dims) { @@ -354,6 +366,18 @@ public void execute(Consumer consumer) { } } + private static class HammingBinaryByteBenchmarkFunction extends BinaryByteBenchmarkFunction { + + private HammingBinaryByteBenchmarkFunction(int dims) { + super(dims); + } + + @Override + public void execute(Consumer consumer) { + new ByteBinaryDenseVector(vectorValue, docVector, dims).hamming(queryVector); + } + } + private static class L2KnnFloatBenchmarkFunction extends KnnFloatBenchmarkFunction { private L2KnnFloatBenchmarkFunction(int dims) { @@ -454,6 +478,11 @@ public void setBenchmarkFunction() { case "binary" -> new L2BinaryByteBenchmarkFunction(dims); default -> throw new UnsupportedOperationException("unexpected type [" + type + "]"); }; + case "hamming" -> benchmarkFunction = switch (type) { + case "knn" -> new HammingKnnByteBenchmarkFunction(dims); + case "binary" -> new HammingBinaryByteBenchmarkFunction(dims); + default -> throw new UnsupportedOperationException("unexpected type [" + type + "]"); + }; default -> throw new UnsupportedOperationException("unexpected function [" + function + "]"); } } diff --git a/docs/changelog/109359.yaml b/docs/changelog/109359.yaml new file mode 100644 index 0000000000000..37202eb5a28ec --- /dev/null +++ b/docs/changelog/109359.yaml @@ -0,0 +1,5 @@ +pr: 109359 +summary: Adding hamming distance function to painless for `dense_vector` fields +area: Vector Search +type: enhancement +issues: [] diff --git a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc b/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc index 775c0cc212426..4300a1c7efc66 100644 --- a/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc +++ b/docs/painless/painless-api-reference/painless-api-reference-score/index.asciidoc @@ -23,6 +23,7 @@ The following methods are directly callable without a class/instance qualifier. * double dotProduct(Object *, String *) * double l1norm(Object *, String *) * double l2norm(Object *, String *) +* double hamming(Object *, String *) * double randomScore(int *) * double randomScore(int *, String *) * double saturation(double, double) diff --git a/docs/reference/vectors/vector-functions.asciidoc b/docs/reference/vectors/vector-functions.asciidoc index e0ed85189c97d..4e627ef18ec6c 100644 --- a/docs/reference/vectors/vector-functions.asciidoc +++ b/docs/reference/vectors/vector-functions.asciidoc @@ -12,9 +12,10 @@ This is the list of available vector functions and vector access methods: 1. <> – calculates cosine similarity 2. <> – calculates dot product 3. <> – calculates L^1^ distance -4. <> - calculates L^2^ distance -5. <].vectorValue`>> – returns a vector's value as an array of floats -6. <].magnitude`>> – returns a vector's magnitude +4. <> – calculates Hamming distance +5. <> - calculates L^2^ distance +6. <].vectorValue`>> – returns a vector's value as an array of floats +7. <].magnitude`>> – returns a vector's magnitude NOTE: The recommended way to access dense vectors is through the `cosineSimilarity`, `dotProduct`, `l1norm` or `l2norm` functions. Please note @@ -35,8 +36,15 @@ PUT my-index-000001 "properties": { "my_dense_vector": { "type": "dense_vector", + "index": false, "dims": 3 }, + "my_byte_dense_vector": { + "type": "dense_vector", + "index": false, + "dims": 3, + "element_type": "byte" + }, "status" : { "type" : "keyword" } @@ -47,12 +55,14 @@ PUT my-index-000001 PUT my-index-000001/_doc/1 { "my_dense_vector": [0.5, 10, 6], + "my_byte_dense_vector": [0, 10, 6], "status" : "published" } PUT my-index-000001/_doc/2 { "my_dense_vector": [-0.5, 10, 10], + "my_byte_dense_vector": [0, 10, 10], "status" : "published" } @@ -179,6 +189,40 @@ we reversed the output from `l1norm` and `l2norm`. Also, to avoid division by 0 when a document vector matches the query exactly, we added `1` in the denominator. +[[vector-functions-hamming]] +====== Hamming distance + +The `hamming` function calculates {wikipedia}/Hamming_distance[Hamming distance] between a given query vector and +document vectors. It is only available for byte vectors. + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "query": { + "script_score": { + "query" : { + "bool" : { + "filter" : { + "term" : { + "status" : "published" + } + } + } + }, + "script": { + "source": "(24 - hamming(params.queryVector, 'my_byte_dense_vector')) / 24", <1> + "params": { + "queryVector": [4, 3, 0] + } + } + } + } +} +-------------------------------------------------- + +<1> Calculate the Hamming distance and normalize it by the bits to get a score between 0 and 1. + [[vector-functions-l2]] ====== L^2^ distance (Euclidean distance) diff --git a/modules/lang-painless/src/main/generated/whitelist-json/painless-score.json b/modules/lang-painless/src/main/generated/whitelist-json/painless-score.json index da9f7f7b60386..0a067b3e98b56 100644 --- a/modules/lang-painless/src/main/generated/whitelist-json/painless-score.json +++ b/modules/lang-painless/src/main/generated/whitelist-json/painless-score.json @@ -1 +1 @@ -{"name":"score","classes":[{"name":"String","imported":true,"constructors":[{"declaring":"String","parameters":[]}],"static_methods":[{"declaring":"String","name":"copyValueOf","return":"String","parameters":["char[]"]},{"declaring":"String","name":"copyValueOf","return":"String","parameters":["char[]","int","int"]},{"declaring":"String","name":"format","return":"String","parameters":["String","def[]"]},{"declaring":"String","name":"format","return":"String","parameters":["Locale","String","def[]"]},{"declaring":"String","name":"join","return":"String","parameters":["CharSequence","Iterable"]},{"declaring":"String","name":"valueOf","return":"String","parameters":["def"]}],"methods":[{"declaring":"CharSequence","name":"charAt","return":"char","parameters":["int"]},{"declaring":"CharSequence","name":"chars","return":"IntStream","parameters":[]},{"declaring":"String","name":"codePointAt","return":"int","parameters":["int"]},{"declaring":"String","name":"codePointBefore","return":"int","parameters":["int"]},{"declaring":"String","name":"codePointCount","return":"int","parameters":["int","int"]},{"declaring":"CharSequence","name":"codePoints","return":"IntStream","parameters":[]},{"declaring":"String","name":"compareTo","return":"int","parameters":["String"]},{"declaring":"String","name":"compareToIgnoreCase","return":"int","parameters":["String"]},{"declaring":"String","name":"concat","return":"String","parameters":["String"]},{"declaring":"String","name":"contains","return":"boolean","parameters":["CharSequence"]},{"declaring":"String","name":"contentEquals","return":"boolean","parameters":["CharSequence"]},{"declaring":null,"name":"decodeBase64","return":"String","parameters":[]},{"declaring":null,"name":"encodeBase64","return":"String","parameters":[]},{"declaring":"String","name":"endsWith","return":"boolean","parameters":["String"]},{"declaring":"Object","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":"String","name":"equalsIgnoreCase","return":"boolean","parameters":["String"]},{"declaring":"String","name":"getChars","return":"void","parameters":["int","int","char[]","int"]},{"declaring":"Object","name":"hashCode","return":"int","parameters":[]},{"declaring":"String","name":"indexOf","return":"int","parameters":["String"]},{"declaring":"String","name":"indexOf","return":"int","parameters":["String","int"]},{"declaring":"String","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"String","name":"lastIndexOf","return":"int","parameters":["String"]},{"declaring":"String","name":"lastIndexOf","return":"int","parameters":["String","int"]},{"declaring":"CharSequence","name":"length","return":"int","parameters":[]},{"declaring":"String","name":"offsetByCodePoints","return":"int","parameters":["int","int"]},{"declaring":"String","name":"regionMatches","return":"boolean","parameters":["int","String","int","int"]},{"declaring":"String","name":"regionMatches","return":"boolean","parameters":["boolean","int","String","int","int"]},{"declaring":"String","name":"replace","return":"String","parameters":["CharSequence","CharSequence"]},{"declaring":null,"name":"replaceAll","return":"String","parameters":["Pattern","Function"]},{"declaring":null,"name":"replaceFirst","return":"String","parameters":["Pattern","Function"]},{"declaring":null,"name":"splitOnToken","return":"String[]","parameters":["String"]},{"declaring":null,"name":"splitOnToken","return":"String[]","parameters":["String","int"]},{"declaring":"String","name":"startsWith","return":"boolean","parameters":["String"]},{"declaring":"String","name":"startsWith","return":"boolean","parameters":["String","int"]},{"declaring":"CharSequence","name":"subSequence","return":"CharSequence","parameters":["int","int"]},{"declaring":"String","name":"substring","return":"String","parameters":["int"]},{"declaring":"String","name":"substring","return":"String","parameters":["int","int"]},{"declaring":"String","name":"toCharArray","return":"char[]","parameters":[]},{"declaring":"String","name":"toLowerCase","return":"String","parameters":[]},{"declaring":"String","name":"toLowerCase","return":"String","parameters":["Locale"]},{"declaring":"CharSequence","name":"toString","return":"String","parameters":[]},{"declaring":"String","name":"toUpperCase","return":"String","parameters":[]},{"declaring":"String","name":"toUpperCase","return":"String","parameters":["Locale"]},{"declaring":"String","name":"trim","return":"String","parameters":[]}],"static_fields":[],"fields":[]},{"name":"DenseVectorScriptDocValues","imported":true,"constructors":[],"static_methods":[],"methods":[{"declaring":"Collection","name":"add","return":"boolean","parameters":["def"]},{"declaring":"List","name":"add","return":"void","parameters":["int","def"]},{"declaring":"Collection","name":"addAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"addAll","return":"boolean","parameters":["int","Collection"]},{"declaring":null,"name":"any","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"asCollection","return":"Collection","parameters":[]},{"declaring":null,"name":"asList","return":"List","parameters":[]},{"declaring":"Collection","name":"clear","return":"void","parameters":[]},{"declaring":null,"name":"collect","return":"List","parameters":["Function"]},{"declaring":null,"name":"collect","return":"def","parameters":["Collection","Function"]},{"declaring":"Collection","name":"contains","return":"boolean","parameters":["def"]},{"declaring":"Collection","name":"containsAll","return":"boolean","parameters":["Collection"]},{"declaring":null,"name":"each","return":"def","parameters":["Consumer"]},{"declaring":null,"name":"eachWithIndex","return":"def","parameters":["ObjIntConsumer"]},{"declaring":"List","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":null,"name":"every","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"find","return":"def","parameters":["Predicate"]},{"declaring":null,"name":"findAll","return":"List","parameters":["Predicate"]},{"declaring":null,"name":"findResult","return":"def","parameters":["Function"]},{"declaring":null,"name":"findResult","return":"def","parameters":["def","Function"]},{"declaring":null,"name":"findResults","return":"List","parameters":["Function"]},{"declaring":"Iterable","name":"forEach","return":"void","parameters":["Consumer"]},{"declaring":"List","name":"get","return":"def","parameters":["int"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String","Object"]},{"declaring":null,"name":"getLength","return":"int","parameters":[]},{"declaring":null,"name":"groupBy","return":"Map","parameters":["Function"]},{"declaring":"List","name":"hashCode","return":"int","parameters":[]},{"declaring":"List","name":"indexOf","return":"int","parameters":["def"]},{"declaring":"Collection","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"Iterable","name":"iterator","return":"Iterator","parameters":[]},{"declaring":null,"name":"join","return":"String","parameters":["String"]},{"declaring":"List","name":"lastIndexOf","return":"int","parameters":["def"]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":[]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":["int"]},{"declaring":"List","name":"remove","return":"def","parameters":["int"]},{"declaring":"Collection","name":"removeAll","return":"boolean","parameters":["Collection"]},{"declaring":"Collection","name":"removeIf","return":"boolean","parameters":["Predicate"]},{"declaring":"List","name":"replaceAll","return":"void","parameters":["UnaryOperator"]},{"declaring":"Collection","name":"retainAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"set","return":"def","parameters":["int","def"]},{"declaring":"Collection","name":"size","return":"int","parameters":[]},{"declaring":"List","name":"sort","return":"void","parameters":["Comparator"]},{"declaring":null,"name":"split","return":"List","parameters":["Predicate"]},{"declaring":"Collection","name":"spliterator","return":"Spliterator","parameters":[]},{"declaring":"Collection","name":"stream","return":"Stream","parameters":[]},{"declaring":"List","name":"subList","return":"List","parameters":["int","int"]},{"declaring":null,"name":"sum","return":"double","parameters":[]},{"declaring":null,"name":"sum","return":"double","parameters":["ToDoubleFunction"]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":[]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":["def[]"]},{"declaring":"Object","name":"toString","return":"String","parameters":[]}],"static_fields":[],"fields":[]},{"name":"VersionScriptDocValues","imported":true,"constructors":[],"static_methods":[],"methods":[{"declaring":"Collection","name":"add","return":"boolean","parameters":["def"]},{"declaring":"List","name":"add","return":"void","parameters":["int","def"]},{"declaring":"Collection","name":"addAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"addAll","return":"boolean","parameters":["int","Collection"]},{"declaring":null,"name":"any","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"asCollection","return":"Collection","parameters":[]},{"declaring":null,"name":"asList","return":"List","parameters":[]},{"declaring":"Collection","name":"clear","return":"void","parameters":[]},{"declaring":null,"name":"collect","return":"List","parameters":["Function"]},{"declaring":null,"name":"collect","return":"def","parameters":["Collection","Function"]},{"declaring":"Collection","name":"contains","return":"boolean","parameters":["def"]},{"declaring":"Collection","name":"containsAll","return":"boolean","parameters":["Collection"]},{"declaring":null,"name":"each","return":"def","parameters":["Consumer"]},{"declaring":null,"name":"eachWithIndex","return":"def","parameters":["ObjIntConsumer"]},{"declaring":"List","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":null,"name":"every","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"find","return":"def","parameters":["Predicate"]},{"declaring":null,"name":"findAll","return":"List","parameters":["Predicate"]},{"declaring":null,"name":"findResult","return":"def","parameters":["Function"]},{"declaring":null,"name":"findResult","return":"def","parameters":["def","Function"]},{"declaring":null,"name":"findResults","return":"List","parameters":["Function"]},{"declaring":"Iterable","name":"forEach","return":"void","parameters":["Consumer"]},{"declaring":"VersionScriptDocValues","name":"get","return":"String","parameters":["int"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String","Object"]},{"declaring":null,"name":"getLength","return":"int","parameters":[]},{"declaring":"VersionScriptDocValues","name":"getValue","return":"String","parameters":[]},{"declaring":null,"name":"groupBy","return":"Map","parameters":["Function"]},{"declaring":"List","name":"hashCode","return":"int","parameters":[]},{"declaring":"List","name":"indexOf","return":"int","parameters":["def"]},{"declaring":"Collection","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"Iterable","name":"iterator","return":"Iterator","parameters":[]},{"declaring":null,"name":"join","return":"String","parameters":["String"]},{"declaring":"List","name":"lastIndexOf","return":"int","parameters":["def"]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":[]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":["int"]},{"declaring":"List","name":"remove","return":"def","parameters":["int"]},{"declaring":"Collection","name":"removeAll","return":"boolean","parameters":["Collection"]},{"declaring":"Collection","name":"removeIf","return":"boolean","parameters":["Predicate"]},{"declaring":"List","name":"replaceAll","return":"void","parameters":["UnaryOperator"]},{"declaring":"Collection","name":"retainAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"set","return":"def","parameters":["int","def"]},{"declaring":"Collection","name":"size","return":"int","parameters":[]},{"declaring":"List","name":"sort","return":"void","parameters":["Comparator"]},{"declaring":null,"name":"split","return":"List","parameters":["Predicate"]},{"declaring":"Collection","name":"spliterator","return":"Spliterator","parameters":[]},{"declaring":"Collection","name":"stream","return":"Stream","parameters":[]},{"declaring":"List","name":"subList","return":"List","parameters":["int","int"]},{"declaring":null,"name":"sum","return":"double","parameters":[]},{"declaring":null,"name":"sum","return":"double","parameters":["ToDoubleFunction"]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":[]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":["def[]"]},{"declaring":"Object","name":"toString","return":"String","parameters":[]}],"static_fields":[],"fields":[]}],"imported_methods":[{"declaring":null,"name":"saturation","return":"double","parameters":["double","double"]},{"declaring":null,"name":"sigmoid","return":"double","parameters":["double","double","double"]}],"class_bindings":[{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$CosineSimilarity","name":"cosineSimilarity","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateExp","name":"decayDateExp","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateGauss","name":"decayDateGauss","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateLinear","name":"decayDateLinear","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoExp","name":"decayGeoExp","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoGauss","name":"decayGeoGauss","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoLinear","name":"decayGeoLinear","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericExp","name":"decayNumericExp","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericGauss","name":"decayNumericGauss","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericLinear","name":"decayNumericLinear","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$DotProduct","name":"dotProduct","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$L1Norm","name":"l1norm","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$L2Norm","name":"l2norm","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$RandomScoreDoc","name":"randomScore","return":"double","read_only":2,"parameters":["org.elasticsearch.script.ScoreScript","int"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$RandomScoreField","name":"randomScore","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","int","java.lang.String"]}],"instance_bindings":[]} +{"name":"score","classes":[{"name":"String","imported":true,"constructors":[{"declaring":"String","parameters":[]}],"static_methods":[{"declaring":"String","name":"copyValueOf","return":"String","parameters":["char[]"]},{"declaring":"String","name":"copyValueOf","return":"String","parameters":["char[]","int","int"]},{"declaring":"String","name":"format","return":"String","parameters":["String","def[]"]},{"declaring":"String","name":"format","return":"String","parameters":["Locale","String","def[]"]},{"declaring":"String","name":"join","return":"String","parameters":["CharSequence","Iterable"]},{"declaring":"String","name":"valueOf","return":"String","parameters":["def"]}],"methods":[{"declaring":"CharSequence","name":"charAt","return":"char","parameters":["int"]},{"declaring":"CharSequence","name":"chars","return":"IntStream","parameters":[]},{"declaring":"String","name":"codePointAt","return":"int","parameters":["int"]},{"declaring":"String","name":"codePointBefore","return":"int","parameters":["int"]},{"declaring":"String","name":"codePointCount","return":"int","parameters":["int","int"]},{"declaring":"CharSequence","name":"codePoints","return":"IntStream","parameters":[]},{"declaring":"String","name":"compareTo","return":"int","parameters":["String"]},{"declaring":"String","name":"compareToIgnoreCase","return":"int","parameters":["String"]},{"declaring":"String","name":"concat","return":"String","parameters":["String"]},{"declaring":"String","name":"contains","return":"boolean","parameters":["CharSequence"]},{"declaring":"String","name":"contentEquals","return":"boolean","parameters":["CharSequence"]},{"declaring":null,"name":"decodeBase64","return":"String","parameters":[]},{"declaring":null,"name":"encodeBase64","return":"String","parameters":[]},{"declaring":"String","name":"endsWith","return":"boolean","parameters":["String"]},{"declaring":"Object","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":"String","name":"equalsIgnoreCase","return":"boolean","parameters":["String"]},{"declaring":"String","name":"getChars","return":"void","parameters":["int","int","char[]","int"]},{"declaring":"Object","name":"hashCode","return":"int","parameters":[]},{"declaring":"String","name":"indexOf","return":"int","parameters":["String"]},{"declaring":"String","name":"indexOf","return":"int","parameters":["String","int"]},{"declaring":"String","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"String","name":"lastIndexOf","return":"int","parameters":["String"]},{"declaring":"String","name":"lastIndexOf","return":"int","parameters":["String","int"]},{"declaring":"CharSequence","name":"length","return":"int","parameters":[]},{"declaring":"String","name":"offsetByCodePoints","return":"int","parameters":["int","int"]},{"declaring":"String","name":"regionMatches","return":"boolean","parameters":["int","String","int","int"]},{"declaring":"String","name":"regionMatches","return":"boolean","parameters":["boolean","int","String","int","int"]},{"declaring":"String","name":"replace","return":"String","parameters":["CharSequence","CharSequence"]},{"declaring":null,"name":"replaceAll","return":"String","parameters":["Pattern","Function"]},{"declaring":null,"name":"replaceFirst","return":"String","parameters":["Pattern","Function"]},{"declaring":null,"name":"splitOnToken","return":"String[]","parameters":["String"]},{"declaring":null,"name":"splitOnToken","return":"String[]","parameters":["String","int"]},{"declaring":"String","name":"startsWith","return":"boolean","parameters":["String"]},{"declaring":"String","name":"startsWith","return":"boolean","parameters":["String","int"]},{"declaring":"CharSequence","name":"subSequence","return":"CharSequence","parameters":["int","int"]},{"declaring":"String","name":"substring","return":"String","parameters":["int"]},{"declaring":"String","name":"substring","return":"String","parameters":["int","int"]},{"declaring":"String","name":"toCharArray","return":"char[]","parameters":[]},{"declaring":"String","name":"toLowerCase","return":"String","parameters":[]},{"declaring":"String","name":"toLowerCase","return":"String","parameters":["Locale"]},{"declaring":"CharSequence","name":"toString","return":"String","parameters":[]},{"declaring":"String","name":"toUpperCase","return":"String","parameters":[]},{"declaring":"String","name":"toUpperCase","return":"String","parameters":["Locale"]},{"declaring":"String","name":"trim","return":"String","parameters":[]}],"static_fields":[],"fields":[]},{"name":"DenseVectorScriptDocValues","imported":true,"constructors":[],"static_methods":[],"methods":[{"declaring":"Collection","name":"add","return":"boolean","parameters":["def"]},{"declaring":"List","name":"add","return":"void","parameters":["int","def"]},{"declaring":"Collection","name":"addAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"addAll","return":"boolean","parameters":["int","Collection"]},{"declaring":null,"name":"any","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"asCollection","return":"Collection","parameters":[]},{"declaring":null,"name":"asList","return":"List","parameters":[]},{"declaring":"Collection","name":"clear","return":"void","parameters":[]},{"declaring":null,"name":"collect","return":"List","parameters":["Function"]},{"declaring":null,"name":"collect","return":"def","parameters":["Collection","Function"]},{"declaring":"Collection","name":"contains","return":"boolean","parameters":["def"]},{"declaring":"Collection","name":"containsAll","return":"boolean","parameters":["Collection"]},{"declaring":null,"name":"each","return":"def","parameters":["Consumer"]},{"declaring":null,"name":"eachWithIndex","return":"def","parameters":["ObjIntConsumer"]},{"declaring":"List","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":null,"name":"every","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"find","return":"def","parameters":["Predicate"]},{"declaring":null,"name":"findAll","return":"List","parameters":["Predicate"]},{"declaring":null,"name":"findResult","return":"def","parameters":["Function"]},{"declaring":null,"name":"findResult","return":"def","parameters":["def","Function"]},{"declaring":null,"name":"findResults","return":"List","parameters":["Function"]},{"declaring":"Iterable","name":"forEach","return":"void","parameters":["Consumer"]},{"declaring":"List","name":"get","return":"def","parameters":["int"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String","Object"]},{"declaring":null,"name":"getLength","return":"int","parameters":[]},{"declaring":null,"name":"groupBy","return":"Map","parameters":["Function"]},{"declaring":"List","name":"hashCode","return":"int","parameters":[]},{"declaring":"List","name":"indexOf","return":"int","parameters":["def"]},{"declaring":"Collection","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"Iterable","name":"iterator","return":"Iterator","parameters":[]},{"declaring":null,"name":"join","return":"String","parameters":["String"]},{"declaring":"List","name":"lastIndexOf","return":"int","parameters":["def"]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":[]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":["int"]},{"declaring":"List","name":"remove","return":"def","parameters":["int"]},{"declaring":"Collection","name":"removeAll","return":"boolean","parameters":["Collection"]},{"declaring":"Collection","name":"removeIf","return":"boolean","parameters":["Predicate"]},{"declaring":"List","name":"replaceAll","return":"void","parameters":["UnaryOperator"]},{"declaring":"Collection","name":"retainAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"set","return":"def","parameters":["int","def"]},{"declaring":"Collection","name":"size","return":"int","parameters":[]},{"declaring":"List","name":"sort","return":"void","parameters":["Comparator"]},{"declaring":null,"name":"split","return":"List","parameters":["Predicate"]},{"declaring":"Collection","name":"spliterator","return":"Spliterator","parameters":[]},{"declaring":"Collection","name":"stream","return":"Stream","parameters":[]},{"declaring":"List","name":"subList","return":"List","parameters":["int","int"]},{"declaring":null,"name":"sum","return":"double","parameters":[]},{"declaring":null,"name":"sum","return":"double","parameters":["ToDoubleFunction"]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":[]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":["def[]"]},{"declaring":"Object","name":"toString","return":"String","parameters":[]}],"static_fields":[],"fields":[]},{"name":"VersionScriptDocValues","imported":true,"constructors":[],"static_methods":[],"methods":[{"declaring":"Collection","name":"add","return":"boolean","parameters":["def"]},{"declaring":"List","name":"add","return":"void","parameters":["int","def"]},{"declaring":"Collection","name":"addAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"addAll","return":"boolean","parameters":["int","Collection"]},{"declaring":null,"name":"any","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"asCollection","return":"Collection","parameters":[]},{"declaring":null,"name":"asList","return":"List","parameters":[]},{"declaring":"Collection","name":"clear","return":"void","parameters":[]},{"declaring":null,"name":"collect","return":"List","parameters":["Function"]},{"declaring":null,"name":"collect","return":"def","parameters":["Collection","Function"]},{"declaring":"Collection","name":"contains","return":"boolean","parameters":["def"]},{"declaring":"Collection","name":"containsAll","return":"boolean","parameters":["Collection"]},{"declaring":null,"name":"each","return":"def","parameters":["Consumer"]},{"declaring":null,"name":"eachWithIndex","return":"def","parameters":["ObjIntConsumer"]},{"declaring":"List","name":"equals","return":"boolean","parameters":["Object"]},{"declaring":null,"name":"every","return":"boolean","parameters":["Predicate"]},{"declaring":null,"name":"find","return":"def","parameters":["Predicate"]},{"declaring":null,"name":"findAll","return":"List","parameters":["Predicate"]},{"declaring":null,"name":"findResult","return":"def","parameters":["Function"]},{"declaring":null,"name":"findResult","return":"def","parameters":["def","Function"]},{"declaring":null,"name":"findResults","return":"List","parameters":["Function"]},{"declaring":"Iterable","name":"forEach","return":"void","parameters":["Consumer"]},{"declaring":"VersionScriptDocValues","name":"get","return":"String","parameters":["int"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String"]},{"declaring":null,"name":"getByPath","return":"Object","parameters":["String","Object"]},{"declaring":null,"name":"getLength","return":"int","parameters":[]},{"declaring":"VersionScriptDocValues","name":"getValue","return":"String","parameters":[]},{"declaring":null,"name":"groupBy","return":"Map","parameters":["Function"]},{"declaring":"List","name":"hashCode","return":"int","parameters":[]},{"declaring":"List","name":"indexOf","return":"int","parameters":["def"]},{"declaring":"Collection","name":"isEmpty","return":"boolean","parameters":[]},{"declaring":"Iterable","name":"iterator","return":"Iterator","parameters":[]},{"declaring":null,"name":"join","return":"String","parameters":["String"]},{"declaring":"List","name":"lastIndexOf","return":"int","parameters":["def"]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":[]},{"declaring":"List","name":"listIterator","return":"ListIterator","parameters":["int"]},{"declaring":"List","name":"remove","return":"def","parameters":["int"]},{"declaring":"Collection","name":"removeAll","return":"boolean","parameters":["Collection"]},{"declaring":"Collection","name":"removeIf","return":"boolean","parameters":["Predicate"]},{"declaring":"List","name":"replaceAll","return":"void","parameters":["UnaryOperator"]},{"declaring":"Collection","name":"retainAll","return":"boolean","parameters":["Collection"]},{"declaring":"List","name":"set","return":"def","parameters":["int","def"]},{"declaring":"Collection","name":"size","return":"int","parameters":[]},{"declaring":"List","name":"sort","return":"void","parameters":["Comparator"]},{"declaring":null,"name":"split","return":"List","parameters":["Predicate"]},{"declaring":"Collection","name":"spliterator","return":"Spliterator","parameters":[]},{"declaring":"Collection","name":"stream","return":"Stream","parameters":[]},{"declaring":"List","name":"subList","return":"List","parameters":["int","int"]},{"declaring":null,"name":"sum","return":"double","parameters":[]},{"declaring":null,"name":"sum","return":"double","parameters":["ToDoubleFunction"]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":[]},{"declaring":"Collection","name":"toArray","return":"def[]","parameters":["def[]"]},{"declaring":"Object","name":"toString","return":"String","parameters":[]}],"static_fields":[],"fields":[]}],"imported_methods":[{"declaring":null,"name":"saturation","return":"double","parameters":["double","double"]},{"declaring":null,"name":"sigmoid","return":"double","parameters":["double","double","double"]}],"class_bindings":[{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$CosineSimilarity","name":"cosineSimilarity","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateExp","name":"decayDateExp","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateGauss","name":"decayDateGauss","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayDateLinear","name":"decayDateLinear","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.script.JodaCompatibleZonedDateTime"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoExp","name":"decayGeoExp","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoGauss","name":"decayGeoGauss","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayGeoLinear","name":"decayGeoLinear","return":"double","read_only":4,"parameters":["java.lang.String","java.lang.String","java.lang.String","double","org.elasticsearch.common.geo.GeoPoint"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericExp","name":"decayNumericExp","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericGauss","name":"decayNumericGauss","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$DecayNumericLinear","name":"decayNumericLinear","return":"double","read_only":4,"parameters":["double","double","double","double","double"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$DotProduct","name":"dotProduct","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$L1Norm","name":"l1norm","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$Hamming","name":"hamming","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.util.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.VectorScoreScriptUtils$L2Norm","name":"l2norm","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","java.lang.Object","java.lang.String"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$RandomScoreDoc","name":"randomScore","return":"double","read_only":2,"parameters":["org.elasticsearch.script.ScoreScript","int"]},{"declaring":"org.elasticsearch.script.ScoreScriptUtils$RandomScoreField","name":"randomScore","return":"double","read_only":3,"parameters":["org.elasticsearch.script.ScoreScript","int","java.lang.String"]}],"instance_bindings":[]} diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt index b0506e7aa677a..5082d5f1c7bdb 100644 --- a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt +++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.score.txt @@ -31,5 +31,6 @@ static_import { double l2norm(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$L2Norm double cosineSimilarity(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$CosineSimilarity double dotProduct(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$DotProduct + double hamming(org.elasticsearch.script.ScoreScript, Object, String) bound_to org.elasticsearch.script.VectorScoreScriptUtils$Hamming } diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/140_dense_vector_basic.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/140_dense_vector_basic.yml index a4245621f83e0..e49dc20e73406 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/140_dense_vector_basic.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/140_dense_vector_basic.yml @@ -219,3 +219,36 @@ setup: - match: {hits.hits.2._id: "2"} - close_to: {hits.hits.2._score: {value: 186.34454, error: 0.01}} +--- +"Test hamming distance fails on float": + - requires: + cluster_features: ["script.hamming"] + reason: "support for hamming distance added in 8.15" + - do: + headers: + Content-Type: application/json + catch: bad_request + search: + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - do: + headers: + Content-Type: application/json + catch: bad_request + search: + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'indexed_vector')" + params: + query_vector: [0.5, 111.3, -13.0, 14.8, -156.0] + diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/151_dense_vector_byte_hamming.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/151_dense_vector_byte_hamming.yml new file mode 100644 index 0000000000000..373f048e7be78 --- /dev/null +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/151_dense_vector_byte_hamming.yml @@ -0,0 +1,156 @@ +setup: + - requires: + cluster_features: ["script.hamming"] + reason: "support for hamming distance added in 8.15" + test_runner_features: headers + + - do: + indices.create: + index: test-index + body: + settings: + number_of_replicas: 0 + mappings: + properties: + my_dense_vector: + index: false + type: dense_vector + element_type: byte + dims: 5 + my_dense_vector_indexed: + index: true + type: dense_vector + element_type: byte + dims: 5 + + - do: + index: + index: test-index + id: "1" + body: + my_dense_vector: [8, 5, -15, 1, -7] + my_dense_vector_indexed: [8, 5, -15, 1, -7] + + - do: + index: + index: test-index + id: "2" + body: + my_dense_vector: [-1, 115, -3, 4, -128] + my_dense_vector_indexed: [-1, 115, -3, 4, -128] + + - do: + index: + index: test-index + id: "3" + body: + my_dense_vector: [2, 18, -5, 0, -124] + my_dense_vector_indexed: [2, 18, -5, 0, -124] + + - do: + indices.refresh: {} + +--- +"Hamming distance": + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'my_dense_vector')" + params: + query_vector: [0, 111, -13, 14, -124] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0._score: 17.0} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1._score: 16.0} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2._score: 11.0} + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'my_dense_vector_indexed')" + params: + query_vector: [0, 111, -13, 14, -124] + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0._score: 17.0} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1._score: 16.0} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2._score: 11.0} +--- +"Hamming distance hexidecimal": + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'my_dense_vector')" + params: + query_vector: "006ff30e84" + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0._score: 17.0} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1._score: 16.0} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2._score: 11.0} + + + - do: + headers: + Content-Type: application/json + search: + rest_total_hits_as_int: true + body: + query: + script_score: + query: {match_all: {} } + script: + source: "hamming(params.query_vector, 'my_dense_vector_indexed')" + params: + query_vector: "006ff30e84" + + - match: {hits.total: 3} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0._score: 17.0} + + - match: {hits.hits.1._id: "1"} + - match: {hits.hits.1._score: 16.0} + + - match: {hits.hits.2._id: "3"} + - match: {hits.hits.2._score: 11.0} diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index ab11fd3f1b397..db7e3d40518ba 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -431,6 +431,7 @@ org.elasticsearch.indices.IndicesFeatures, org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures, org.elasticsearch.index.mapper.MapperFeatures, + org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, org.elasticsearch.reservedstate.service.FileSettingsFeatures; diff --git a/server/src/main/java/org/elasticsearch/script/ScriptFeatures.java b/server/src/main/java/org/elasticsearch/script/ScriptFeatures.java new file mode 100644 index 0000000000000..d4d78bf08844b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/script/ScriptFeatures.java @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.script; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +public final class ScriptFeatures implements FeatureSpecification { + @Override + public Set getFeatures() { + return Set.of(VectorScoreScriptUtils.HAMMING_DISTANCE_FUNCTION); + } +} diff --git a/server/src/main/java/org/elasticsearch/script/VectorScoreScriptUtils.java b/server/src/main/java/org/elasticsearch/script/VectorScoreScriptUtils.java index b071739321eaf..bccdd5782f277 100644 --- a/server/src/main/java/org/elasticsearch/script/VectorScoreScriptUtils.java +++ b/server/src/main/java/org/elasticsearch/script/VectorScoreScriptUtils.java @@ -9,6 +9,8 @@ package org.elasticsearch.script; import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.script.field.vectors.DenseVector; import org.elasticsearch.script.field.vectors.DenseVectorDocValuesField; @@ -18,6 +20,8 @@ public class VectorScoreScriptUtils { + public static final NodeFeature HAMMING_DISTANCE_FUNCTION = new NodeFeature("script.hamming"); + public static class DenseVectorFunction { protected final ScoreScript scoreScript; protected final DenseVectorDocValuesField field; @@ -187,6 +191,52 @@ public double l1norm() { } } + // Calculate Hamming distances between a query's dense vector and documents' dense vectors + public interface HammingDistanceInterface { + int hamming(); + } + + public static class ByteHammingDistance extends ByteDenseVectorFunction implements HammingDistanceInterface { + + public ByteHammingDistance(ScoreScript scoreScript, DenseVectorDocValuesField field, List queryVector) { + super(scoreScript, field, queryVector); + } + + public ByteHammingDistance(ScoreScript scoreScript, DenseVectorDocValuesField field, byte[] queryVector) { + super(scoreScript, field, queryVector); + } + + public int hamming() { + setNextVector(); + return field.get().hamming(queryVector); + } + } + + public static final class Hamming { + + private final HammingDistanceInterface function; + + @SuppressWarnings("unchecked") + public Hamming(ScoreScript scoreScript, Object queryVector, String fieldName) { + DenseVectorDocValuesField field = (DenseVectorDocValuesField) scoreScript.field(fieldName); + if (field.getElementType() != DenseVectorFieldMapper.ElementType.BYTE) { + throw new IllegalArgumentException("hamming distance is only supported for byte vectors"); + } + if (queryVector instanceof List) { + function = new ByteHammingDistance(scoreScript, field, (List) queryVector); + } else if (queryVector instanceof String s) { + byte[] parsedQueryVector = HexFormat.of().parseHex(s); + function = new ByteHammingDistance(scoreScript, field, parsedQueryVector); + } else { + throw new IllegalArgumentException("Unsupported input object for byte vectors: " + queryVector.getClass().getName()); + } + } + + public double hamming() { + return function.hamming(); + } + } + // Calculate l2 norm (Manhattan distance) between a query's dense vector and documents' dense vectors public interface L2NormInterface { double l2norm(); diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/BinaryDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/BinaryDenseVector.java index cffddfabf4aba..4fbfdcf9771a3 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/BinaryDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/BinaryDenseVector.java @@ -83,6 +83,16 @@ public double l1Norm(List queryVector) { return l1norm; } + @Override + public int hamming(byte[] queryVector) { + throw new UnsupportedOperationException("hamming distance is not supported for float vectors"); + } + + @Override + public int hamming(List queryVector) { + throw new UnsupportedOperationException("hamming distance is not supported for float vectors"); + } + @Override public double l2Norm(byte[] queryVector) { throw new UnsupportedOperationException("use [double l2Norm(float[] queryVector)] instead"); diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java index a986b62ce8496..c009397452c8a 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteBinaryDenseVector.java @@ -100,6 +100,20 @@ public double l1Norm(List queryVector) { return result; } + @Override + public int hamming(byte[] queryVector) { + return VectorUtil.xorBitCount(queryVector, vectorValue); + } + + @Override + public int hamming(List queryVector) { + int distance = 0; + for (int i = 0; i < queryVector.size(); i++) { + distance += Integer.bitCount((queryVector.get(i).intValue() ^ vectorValue[i]) & 0xFF); + } + return distance; + } + @Override public double l2Norm(byte[] queryVector) { return Math.sqrt(VectorUtil.squareDistance(queryVector, vectorValue)); diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java index b00b6703872ab..e0ba032826aa1 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/ByteKnnDenseVector.java @@ -101,6 +101,20 @@ public double l1Norm(List queryVector) { return result; } + @Override + public int hamming(byte[] queryVector) { + return VectorUtil.xorBitCount(queryVector, docVector); + } + + @Override + public int hamming(List queryVector) { + int distance = 0; + for (int i = 0; i < queryVector.size(); i++) { + distance += Integer.bitCount((queryVector.get(i).intValue() ^ docVector[i]) & 0xFF); + } + return distance; + } + @Override public double l2Norm(byte[] queryVector) { return Math.sqrt(VectorUtil.squareDistance(docVector, queryVector)); diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/DenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/DenseVector.java index d18ae16746819..a768e8add6663 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/DenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/DenseVector.java @@ -14,8 +14,7 @@ /** * DenseVector value type for the painless. - */ -/* dotProduct, l1Norm, l2Norm, cosineSimilarity have three flavors depending on the type of the queryVector + * dotProduct, l1Norm, l2Norm, cosineSimilarity have three flavors depending on the type of the queryVector * 1) float[], this is for the ScoreScriptUtils class bindings which have converted a List based query vector into an array * 2) List, A painless script will typically use Lists since they are easy to pass as params and have an easy * literal syntax. Working with Lists directly, instead of converting to a float[], trades off runtime operations against @@ -74,6 +73,24 @@ default double l1Norm(Object queryVector) { throw new IllegalArgumentException(badQueryVectorType(queryVector)); } + int hamming(byte[] queryVector); + + int hamming(List queryVector); + + @SuppressWarnings("unchecked") + default int hamming(Object queryVector) { + if (queryVector instanceof List list) { + checkDimensions(getDims(), list.size()); + return hamming((List) list); + } + if (queryVector instanceof byte[] bytes) { + checkDimensions(getDims(), bytes.length); + return hamming(bytes); + } + + throw new IllegalArgumentException(badQueryVectorType(queryVector)); + } + double l2Norm(byte[] queryVector); double l2Norm(float[] queryVector); @@ -231,6 +248,16 @@ public double l1Norm(List queryVector) { throw new IllegalArgumentException(MISSING_VECTOR_FIELD_MESSAGE); } + @Override + public int hamming(byte[] queryVector) { + throw new IllegalArgumentException(MISSING_VECTOR_FIELD_MESSAGE); + } + + @Override + public int hamming(List queryVector) { + throw new IllegalArgumentException(MISSING_VECTOR_FIELD_MESSAGE); + } + @Override public double l2Norm(byte[] queryVector) { throw new IllegalArgumentException(MISSING_VECTOR_FIELD_MESSAGE); diff --git a/server/src/main/java/org/elasticsearch/script/field/vectors/KnnDenseVector.java b/server/src/main/java/org/elasticsearch/script/field/vectors/KnnDenseVector.java index 1605f179e36aa..7f94f029dcbb3 100644 --- a/server/src/main/java/org/elasticsearch/script/field/vectors/KnnDenseVector.java +++ b/server/src/main/java/org/elasticsearch/script/field/vectors/KnnDenseVector.java @@ -85,6 +85,16 @@ public double l1Norm(List queryVector) { return result; } + @Override + public int hamming(byte[] queryVector) { + throw new UnsupportedOperationException("hamming distance is not supported for float vectors"); + } + + @Override + public int hamming(List queryVector) { + throw new UnsupportedOperationException("hamming distance is not supported for float vectors"); + } + @Override public double l2Norm(byte[] queryVector) { throw new UnsupportedOperationException("use [double l2Norm(float[] queryVector)] instead"); diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index d8a29a84ddbb7..5192ea2b4b108 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -15,4 +15,5 @@ org.elasticsearch.indices.IndicesFeatures org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures org.elasticsearch.index.mapper.MapperFeatures org.elasticsearch.search.retriever.RetrieversFeatures +org.elasticsearch.script.ScriptFeatures org.elasticsearch.reservedstate.service.FileSettingsFeatures diff --git a/server/src/test/java/org/elasticsearch/script/VectorScoreScriptUtilsTests.java b/server/src/test/java/org/elasticsearch/script/VectorScoreScriptUtilsTests.java index a095c4e6409ac..80c93e05b8bd5 100644 --- a/server/src/test/java/org/elasticsearch/script/VectorScoreScriptUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/script/VectorScoreScriptUtilsTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.index.mapper.vectors.KnnDenseVectorScriptDocValuesTests; import org.elasticsearch.script.VectorScoreScriptUtils.CosineSimilarity; import org.elasticsearch.script.VectorScoreScriptUtils.DotProduct; +import org.elasticsearch.script.VectorScoreScriptUtils.Hamming; import org.elasticsearch.script.VectorScoreScriptUtils.L1Norm; import org.elasticsearch.script.VectorScoreScriptUtils.L2Norm; import org.elasticsearch.script.field.vectors.BinaryDenseVectorDocValuesField; @@ -112,6 +113,12 @@ public void testFloatVectorClassBindings() throws IOException { containsString("query vector has a different number of dimensions [2] than the document vectors [5]") ); + e = expectThrows(IllegalArgumentException.class, () -> new Hamming(scoreScript, queryVector, fieldName)); + assertThat(e.getMessage(), containsString("hamming distance is only supported for byte vectors")); + + e = expectThrows(IllegalArgumentException.class, () -> new Hamming(scoreScript, invalidQueryVector, fieldName)); + assertThat(e.getMessage(), containsString("hamming distance is only supported for byte vectors")); + // Check scripting infrastructure integration DotProduct dotProduct = new DotProduct(scoreScript, queryVector, fieldName); assertEquals(65425.6249, dotProduct.dotProduct(), 0.001); @@ -199,6 +206,11 @@ public void testByteVectorClassBindings() throws IOException { e.getMessage(), containsString("query vector has a different number of dimensions [2] than the document vectors [5]") ); + e = expectThrows(IllegalArgumentException.class, () -> new Hamming(scoreScript, invalidQueryVector, fieldName)); + assertThat( + e.getMessage(), + containsString("query vector has a different number of dimensions [2] than the document vectors [5]") + ); // Check scripting infrastructure integration assertEquals(17382.0, new DotProduct(scoreScript, queryVector, fieldName).dotProduct(), 0.001); @@ -207,6 +219,8 @@ public void testByteVectorClassBindings() throws IOException { assertEquals(135.0, new L1Norm(scoreScript, hexidecimalString, fieldName).l1norm(), 0.001); assertEquals(116.897, new L2Norm(scoreScript, queryVector, fieldName).l2norm(), 0.001); assertEquals(116.897, new L2Norm(scoreScript, hexidecimalString, fieldName).l2norm(), 0.001); + assertEquals(13.0, new Hamming(scoreScript, queryVector, fieldName).hamming(), 0.001); + assertEquals(13.0, new Hamming(scoreScript, hexidecimalString, fieldName).hamming(), 0.001); DotProduct dotProduct = new DotProduct(scoreScript, queryVector, fieldName); when(scoreScript._getDocId()).thenReturn(1); e = expectThrows(IllegalArgumentException.class, dotProduct::dotProduct); From 0ae5aa35b7e5d1b4835d81a1d95dd1475b9c6a1f Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 17 Jun 2024 20:33:59 +0200 Subject: [PATCH 02/26] Optimize BytesReference related code field access patterns (#109782) Cache object fields (even when final, see https://openjdk.org/jeps/8132243) to generate smaller byte code as well as more optimized compiled code for this performance critical code. --- .../elasticsearch/common/bytes/BytesArray.java | 15 +++++++++------ .../common/bytes/BytesReferenceStreamInput.java | 16 ++++++++++++++++ .../common/bytes/CompositeBytesReference.java | 11 +++++++---- .../common/io/stream/ByteBufferStreamInput.java | 10 ++++++++++ 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java b/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java index 40697a0c158a5..3d26f2785a09e 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java @@ -59,16 +59,19 @@ public byte get(int index) { @Override public int indexOf(byte marker, int from) { final int len = length - from; - int off = offset + from; - final int toIndex = offset + length; + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final int offsetAsLocal = offset; + int off = offsetAsLocal + from; + final int toIndex = offsetAsLocal + length; + final byte[] bytesAsLocal = bytes; // First, try to find the marker in the first few bytes, so we can enter the faster 8-byte aligned loop below. // The idea for this logic is taken from Netty's io.netty.buffer.ByteBufUtil.firstIndexOf and optimized for little endian hardware. // See e.g. https://richardstartin.github.io/posts/finding-bytes for the idea behind this optimization. final int byteCount = len & 7; if (byteCount > 0) { - final int index = unrolledFirstIndexOf(bytes, off, byteCount, marker); + final int index = unrolledFirstIndexOf(bytesAsLocal, off, byteCount, marker); if (index != -1) { - return index - offset; + return index - offsetAsLocal; } off += byteCount; if (off == toIndex) { @@ -79,9 +82,9 @@ public int indexOf(byte marker, int from) { // faster SWAR (SIMD Within A Register) loop final long pattern = compilePattern(marker); for (int i = 0; i < longCount; i++) { - int index = findInLong(ByteUtils.readLongLE(bytes, off), pattern); + int index = findInLong(ByteUtils.readLongLE(bytesAsLocal, off), pattern); if (index < Long.BYTES) { - return off + index - offset; + return off + index - offsetAsLocal; } off += Long.BYTES; } diff --git a/server/src/main/java/org/elasticsearch/common/bytes/BytesReferenceStreamInput.java b/server/src/main/java/org/elasticsearch/common/bytes/BytesReferenceStreamInput.java index 22bed3ea0b1e9..42326566743ff 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/BytesReferenceStreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/BytesReferenceStreamInput.java @@ -56,6 +56,8 @@ public byte readByte() throws IOException { @Override public short readShort() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= 2) { return slice.getShort(); } else { @@ -66,6 +68,8 @@ public short readShort() throws IOException { @Override public int readInt() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= 4) { return slice.getInt(); } else { @@ -76,6 +80,8 @@ public int readInt() throws IOException { @Override public long readLong() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= 8) { return slice.getLong(); } else { @@ -87,6 +93,8 @@ public long readLong() throws IOException { @Override public String readString() throws IOException { final int chars = readArraySize(); + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.hasArray()) { // attempt reading bytes directly into a string to minimize copying final String string = tryReadStringFromBytes( @@ -104,6 +112,8 @@ public String readString() throws IOException { @Override public int readVInt() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= 5) { return ByteBufferStreamInput.readVInt(slice); } @@ -112,6 +122,8 @@ public int readVInt() throws IOException { @Override public long readVLong() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= 10) { return ByteBufferStreamInput.readVLong(slice); } else { @@ -161,6 +173,8 @@ public int read() throws IOException { @Override public int read(final byte[] b, final int bOffset, final int len) throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; if (slice.remaining() >= len) { slice.get(b, bOffset, len); return len; @@ -226,6 +240,8 @@ private int skipMultiple(long n) throws IOException { int remaining = numBytesSkipped; while (remaining > 0) { maybeNextSlice(); + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer slice = this.slice; int currentLen = Math.min(remaining, slice.remaining()); remaining -= currentLen; slice.position(slice.position() + currentLen); diff --git a/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java b/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java index 65a3bf95336c6..9b8c06426e97c 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java @@ -116,17 +116,20 @@ public int indexOf(byte marker, int from) { } final int firstReferenceIndex = getOffsetIndex(from); - for (int i = firstReferenceIndex; i < references.length; ++i) { - final BytesReference reference = references[i]; + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final BytesReference[] referencesAsLocal = references; + final int[] offsetsAsLocal = offsets; + for (int i = firstReferenceIndex; i < referencesAsLocal.length; ++i) { + final BytesReference reference = referencesAsLocal[i]; final int internalFrom; if (i == firstReferenceIndex) { - internalFrom = from - offsets[firstReferenceIndex]; + internalFrom = from - offsetsAsLocal[firstReferenceIndex]; } else { internalFrom = 0; } result = reference.indexOf(marker, internalFrom); if (result != -1) { - result += offsets[i]; + result += offsetsAsLocal[i]; break; } } diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/ByteBufferStreamInput.java b/server/src/main/java/org/elasticsearch/common/io/stream/ByteBufferStreamInput.java index 41d129406551f..f1c0486a02d81 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/ByteBufferStreamInput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/ByteBufferStreamInput.java @@ -123,6 +123,8 @@ public static long readVLong(ByteBuffer buffer) throws IOException { @Override public String readString() throws IOException { final int chars = readArraySize(); + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer buffer = this.buffer; if (buffer.hasArray()) { // attempt reading bytes directly into a string to minimize copying final String string = tryReadStringFromBytes( @@ -140,6 +142,8 @@ public String readString() throws IOException { @Override public int read() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer buffer = this.buffer; if (buffer.hasRemaining() == false) { return -1; } @@ -157,6 +161,8 @@ public byte readByte() throws IOException { @Override public int read(byte[] b, int off, int len) throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer buffer = this.buffer; if (buffer.hasRemaining() == false) { return -1; } @@ -168,6 +174,8 @@ public int read(byte[] b, int off, int len) throws IOException { @Override public long skip(long n) throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer buffer = this.buffer; int remaining = buffer.remaining(); if (n > remaining) { buffer.position(buffer.limit()); @@ -257,6 +265,8 @@ protected void ensureCanReadBytes(int length) throws EOFException { @Override public BytesReference readSlicedBytesReference() throws IOException { + // cache object fields (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + final ByteBuffer buffer = this.buffer; if (buffer.hasArray()) { int len = readVInt(); var res = new BytesArray(buffer.array(), buffer.arrayOffset() + buffer.position(), len); From 7ad9534707d7713b425ee917fb1f59c0cce09b7d Mon Sep 17 00:00:00 2001 From: Jake Landis Date: Mon, 17 Jun 2024 13:35:16 -0500 Subject: [PATCH 03/26] Bump jackson version in modules:repository-azure (#109717) --- docs/changelog/109717.yaml | 5 +++++ gradle/verification-metadata.xml | 25 +++++++++++++++++++++++++ modules/repository-azure/build.gradle | 2 +- 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/109717.yaml diff --git a/docs/changelog/109717.yaml b/docs/changelog/109717.yaml new file mode 100644 index 0000000000000..326657ea4ce21 --- /dev/null +++ b/docs/changelog/109717.yaml @@ -0,0 +1,5 @@ +pr: 109717 +summary: Bump jackson version in modules:repository-azure +area: Snapshot/Restore +type: upgrade +issues: [] diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9860253d70e58..6e4beb0953b56 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -266,6 +266,11 @@ + + + + + @@ -286,6 +291,11 @@ + + + + + @@ -331,6 +341,11 @@ + + + + + @@ -346,6 +361,11 @@ + + + + + @@ -366,6 +386,11 @@ + + + + + diff --git a/modules/repository-azure/build.gradle b/modules/repository-azure/build.gradle index c2568d9a4db2c..d093816acd45f 100644 --- a/modules/repository-azure/build.gradle +++ b/modules/repository-azure/build.gradle @@ -21,7 +21,7 @@ versions << [ 'azureCommon': '12.19.1', 'azureCore': '1.34.0', 'azureCoreHttpNetty': '1.12.7', - 'azureJackson': '2.13.4', + 'azureJackson': '2.15.4', 'azureJacksonDatabind': '2.13.4.2', 'azureAvro': '12.5.3', From c6e3a94f95acf37b7930556eeb40b03353adc09d Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 17 Jun 2024 13:32:46 -0700 Subject: [PATCH 04/26] Fix security policies after renaming of elasticsearch-vec (#109826) --- .../java/org/elasticsearch/bootstrap/BootstrapForTesting.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java b/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java index fb55670401ecb..8ef80c08517de 100644 --- a/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java +++ b/test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java @@ -223,7 +223,7 @@ static Map getCodebases() { addClassCodebase(codebases, "elasticsearch-core", "org.elasticsearch.core.Booleans"); addClassCodebase(codebases, "elasticsearch-cli", "org.elasticsearch.cli.Command"); addClassCodebase(codebases, "elasticsearch-preallocate", "org.elasticsearch.preallocate.Preallocate"); - addClassCodebase(codebases, "elasticsearch-vec", "org.elasticsearch.simdvec.VectorScorerFactory"); + addClassCodebase(codebases, "elasticsearch-simdvec", "org.elasticsearch.simdvec.VectorScorerFactory"); addClassCodebase(codebases, "framework", "org.elasticsearch.test.ESTestCase"); return codebases; } From 559716fbc84c4750c4b16e5eefdab2c2a4ec3ba6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 18 Jun 2024 07:00:30 +1000 Subject: [PATCH 05/26] Mute org.elasticsearch.action.search.SearchProgressActionListenerIT testSearchProgressWithHits #109830 --- muted-tests.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index aae24c786ab04..b88be6ea829ae 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -10,8 +10,7 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -20,8 +19,7 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ - \ dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -36,8 +34,7 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ - \ (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -61,6 +58,9 @@ tests: - class: "org.elasticsearch.xpack.esql.EsqlAsyncSecurityIT" issue: "https://github.com/elastic/elasticsearch/issues/109806" method: "testInsufficientPrivilege" +- class: org.elasticsearch.action.search.SearchProgressActionListenerIT + method: testSearchProgressWithHits + issue: https://github.com/elastic/elasticsearch/issues/109830 # Examples: # From 84929f0385bd5f0f8ecbad86a6ec353b7f9bf1b7 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 17 Jun 2024 17:34:37 -0400 Subject: [PATCH 06/26] ESQL: Limit the size of shapes in MV_SLICE tests (#109715) This limits the size of the data used to test MV_SLICE so we don't run out of memory when testing it. Closes #109697 --- .../scalar/multivalue/MvSliceTests.java | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java index 3ab17b78ff8e7..0550be25f9d91 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/multivalue/MvSliceTests.java @@ -306,7 +306,16 @@ private static void bytesRefs(List suppliers) { })); suppliers.add(new TestCaseSupplier(List.of(DataType.GEO_SHAPE, DataType.INTEGER, DataType.INTEGER), () -> { - List field = randomList(1, 5, () -> new BytesRef(GEO.asWkt(GeometryTestUtils.randomGeometry(randomBoolean())))); + var pointCounter = new MvAppendTests.GeometryPointCountVisitor(); + List field = randomList( + 1, + 5, + () -> new BytesRef( + GEO.asWkt( + randomValueOtherThanMany(g -> g.visit(pointCounter) > 500, () -> GeometryTestUtils.randomGeometry(randomBoolean())) + ) + ) + ); int length = field.size(); int start = randomIntBetween(0, length - 1); int end = randomIntBetween(start, length - 1); @@ -323,7 +332,16 @@ private static void bytesRefs(List suppliers) { })); suppliers.add(new TestCaseSupplier(List.of(DataType.CARTESIAN_SHAPE, DataType.INTEGER, DataType.INTEGER), () -> { - List field = randomList(1, 5, () -> new BytesRef(CARTESIAN.asWkt(ShapeTestUtils.randomGeometry(randomBoolean())))); + var pointCounter = new MvAppendTests.GeometryPointCountVisitor(); + List field = randomList( + 1, + 5, + () -> new BytesRef( + CARTESIAN.asWkt( + randomValueOtherThanMany(g -> g.visit(pointCounter) > 500, () -> GeometryTestUtils.randomGeometry(randomBoolean())) + ) + ) + ); int length = field.size(); int start = randomIntBetween(0, length - 1); int end = randomIntBetween(start, length - 1); From e25487ca7213cc34dcc89fbb6bc8f8b446b127d2 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 17 Jun 2024 23:59:05 +0200 Subject: [PATCH 07/26] Optimize field access in DotExpandingXContentParser (#109825) This is one of the hotest methods in document parsing. Adding this optimization here helps a bit with inlining better. --- .../elasticsearch/index/mapper/DotExpandingXContentParser.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java index 6cf44ba6bc447..d8780f28b58a6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DotExpandingXContentParser.java @@ -50,6 +50,8 @@ private static final class WrappingParser extends FilterXContentParser { public Token nextToken() throws IOException { Token token; XContentParser delegate; + // cache object field (even when final this is a valid optimization, see https://openjdk.org/jeps/8132243) + var parsers = this.parsers; while ((token = (delegate = parsers.peek()).nextToken()) == null) { parsers.pop(); if (parsers.isEmpty()) { From c214457b39956a270313bb1b4274f8259636cc44 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 18 Jun 2024 15:25:05 +1200 Subject: [PATCH 08/26] [ML] Handle the "output memory allocator bytes" field (#109653) Handle the "output memory allocator bytes" field if and only if it is present in the model size stats, as reported by the C++ backend. This PR _must_ be merged prior to the corresponding `ml-cpp` one, to keep CI tests happy. --- docs/changelog/109653.yaml | 5 +++ docs/reference/cat/anomaly-detectors.asciidoc | 27 +++++++++------- docs/reference/ml/ml-shared.asciidoc | 12 ++++--- .../org/elasticsearch/TransportVersions.java | 1 + .../autodetect/state/ModelSizeStats.java | 31 +++++++++++++++++++ .../autodetect/state/ModelSizeStatsTests.java | 1 + .../xpack/ml/rest/cat/RestCatJobsAction.java | 11 +++++++ 7 files changed, 72 insertions(+), 16 deletions(-) create mode 100644 docs/changelog/109653.yaml diff --git a/docs/changelog/109653.yaml b/docs/changelog/109653.yaml new file mode 100644 index 0000000000000..665163ec2a91b --- /dev/null +++ b/docs/changelog/109653.yaml @@ -0,0 +1,5 @@ +pr: 109653 +summary: Handle the "JSON memory allocator bytes" field +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc index 607a88d1e1a5c..3416c256881af 100644 --- a/docs/reference/cat/anomaly-detectors.asciidoc +++ b/docs/reference/cat/anomaly-detectors.asciidoc @@ -7,9 +7,9 @@ [IMPORTANT] ==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application -consumption, use the +cat APIs are only intended for human consumption using the command line or {kib} +console. They are _not_ intended for use by applications. For application +consumption, use the <>. ==== @@ -137,7 +137,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count] `forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`::: The average memory usage in bytes for forecasts related to the {anomaly-job}. - + `forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`::: The maximum memory usage in bytes for forecasts related to the {anomaly-job}. @@ -145,8 +145,8 @@ The maximum memory usage in bytes for forecasts related to the {anomaly-job}. The minimum memory usage in bytes for forecasts related to the {anomaly-job}. `forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`::: -The total memory usage in bytes for forecasts related to the {anomaly-job}. - +The total memory usage in bytes for forecasts related to the {anomaly-job}. + `forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`::: The average number of `model_forecast` documents written for forecasts related to the {anomaly-job}. @@ -161,8 +161,8 @@ to the {anomaly-job}. `forecasts.records.total`, `frt`, `forecastsRecordsTotal`::: The total number of `model_forecast` documents written for forecasts related to -the {anomaly-job}. - +the {anomaly-job}. + `forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`::: The average runtime in milliseconds for forecasts related to the {anomaly-job}. @@ -198,7 +198,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded] `model.categorization_status`, `mcs`, `modelCategorizationStatus`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorization-status] - + `model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=categorized-doc-count] @@ -221,6 +221,9 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs] (Default) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-memory-status] +`model.output_memory_allocator_bytes`, `momab`, `modelOutputMemoryAllocatorBytes`::: +include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=output-memory-allocator-bytes] + `model.over_fields`, `mof`, `modelOverFields`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-over-field-count] @@ -232,10 +235,10 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=rare-category-count] `model.timestamp`, `mt`, `modelTimestamp`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=model-timestamp] - + `model.total_category_count`, `mtcc`, `modelTotalCategoryCount`::: include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=total-category-count] - + `node.address`, `na`, `nodeAddress`::: The network address of the node. + @@ -261,7 +264,7 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=open-time] `state`, `s`::: (Default) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] +include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=help] diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 6bbc98db1c2e1..a69fd2f1812e9 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -430,16 +430,16 @@ end::daily-model-snapshot-retention-after-days[] tag::data-description[] The data description defines the format of the input data when you send data to -the job by using the <> API. Note that when using a -{dfeed}, only the `time_field` needs to be set, the rest of the properties are -automatically set. When data is received via the <> API, +the job by using the <> API. Note that when using a +{dfeed}, only the `time_field` needs to be set, the rest of the properties are +automatically set. When data is received via the <> API, it is not stored in {es}. Only the results for {anomaly-detect} are retained. + .Properties of `data_description` [%collapsible%open] ==== `format`::: - (string) Only `xcontent` format is supported at this time, and this is the + (string) Only `xcontent` format is supported at this time, and this is the default value. `time_field`::: @@ -1285,6 +1285,10 @@ tag::job-id-datafeed[] The unique identifier for the job to which the {dfeed} sends data. end::job-id-datafeed[] +tag::output-memory-allocator-bytes[] +The amount of memory, in bytes, used to output {anomaly-job} documents. +end::output-memory-allocator-bytes[] + tag::lambda[] Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies an L2 regularization term which applies to diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 829acfca231e5..754d07a89dbce 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -194,6 +194,7 @@ static TransportVersion def(int id) { public static final TransportVersion DROP_UNUSED_NODES_IDS = def(8_685_00_0); public static final TransportVersion DELETE_SNAPSHOTS_ASYNC_ADDED = def(8_686_00_0); public static final TransportVersion VERSION_SUPPORTING_SPARSE_VECTOR_STATS = def(8_687_00_0); + public static final TransportVersion ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD = def(8_688_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java index 3812c012e2a3d..16eceb1e89a95 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStats.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.core.ml.job.process.autodetect.state; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -48,6 +49,7 @@ public class ModelSizeStats implements ToXContentObject, Writeable { public static final ParseField BUCKET_ALLOCATION_FAILURES_COUNT_FIELD = new ParseField("bucket_allocation_failures_count"); public static final ParseField MEMORY_STATUS_FIELD = new ParseField("memory_status"); public static final ParseField ASSIGNMENT_MEMORY_BASIS_FIELD = new ParseField("assignment_memory_basis"); + public static final ParseField OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD = new ParseField("output_memory_allocator_bytes"); public static final ParseField CATEGORIZED_DOC_COUNT_FIELD = new ParseField("categorized_doc_count"); public static final ParseField TOTAL_CATEGORY_COUNT_FIELD = new ParseField("total_category_count"); public static final ParseField FREQUENT_CATEGORY_COUNT_FIELD = new ParseField("frequent_category_count"); @@ -85,6 +87,7 @@ private static ConstructingObjectParser createParser(boolean igno ASSIGNMENT_MEMORY_BASIS_FIELD, ValueType.STRING ); + parser.declareLong(Builder::setOutputMemoryAllocatorBytes, OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD); parser.declareLong(Builder::setCategorizedDocCount, CATEGORIZED_DOC_COUNT_FIELD); parser.declareLong(Builder::setTotalCategoryCount, TOTAL_CATEGORY_COUNT_FIELD); parser.declareLong(Builder::setFrequentCategoryCount, FREQUENT_CATEGORY_COUNT_FIELD); @@ -188,6 +191,7 @@ public String toString() { private final long bucketAllocationFailuresCount; private final MemoryStatus memoryStatus; private final AssignmentMemoryBasis assignmentMemoryBasis; + private final Long outputMemoryAllocatorBytes; private final long categorizedDocCount; private final long totalCategoryCount; private final long frequentCategoryCount; @@ -210,6 +214,7 @@ private ModelSizeStats( long bucketAllocationFailuresCount, MemoryStatus memoryStatus, AssignmentMemoryBasis assignmentMemoryBasis, + Long outputMemoryAllocatorBytes, long categorizedDocCount, long totalCategoryCount, long frequentCategoryCount, @@ -231,6 +236,7 @@ private ModelSizeStats( this.bucketAllocationFailuresCount = bucketAllocationFailuresCount; this.memoryStatus = memoryStatus; this.assignmentMemoryBasis = assignmentMemoryBasis; + this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes; this.categorizedDocCount = categorizedDocCount; this.totalCategoryCount = totalCategoryCount; this.frequentCategoryCount = frequentCategoryCount; @@ -258,6 +264,11 @@ public ModelSizeStats(StreamInput in) throws IOException { } else { assignmentMemoryBasis = null; } + if (in.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) { + outputMemoryAllocatorBytes = in.readOptionalVLong(); + } else { + outputMemoryAllocatorBytes = null; + } categorizedDocCount = in.readVLong(); totalCategoryCount = in.readVLong(); frequentCategoryCount = in.readVLong(); @@ -295,6 +306,9 @@ public void writeTo(StreamOutput out) throws IOException { } else { out.writeBoolean(false); } + if (out.getTransportVersion().onOrAfter(TransportVersions.ML_AD_OUTPUT_MEMORY_ALLOCATOR_FIELD)) { + out.writeOptionalVLong(outputMemoryAllocatorBytes); + } out.writeVLong(categorizedDocCount); out.writeVLong(totalCategoryCount); out.writeVLong(frequentCategoryCount); @@ -339,6 +353,9 @@ public XContentBuilder doXContentBody(XContentBuilder builder) throws IOExceptio if (assignmentMemoryBasis != null) { builder.field(ASSIGNMENT_MEMORY_BASIS_FIELD.getPreferredName(), assignmentMemoryBasis); } + if (outputMemoryAllocatorBytes != null) { + builder.field(OUTPUT_MEMORY_ALLOCATOR_BYTES_FIELD.getPreferredName(), outputMemoryAllocatorBytes); + } builder.field(CATEGORIZED_DOC_COUNT_FIELD.getPreferredName(), categorizedDocCount); builder.field(TOTAL_CATEGORY_COUNT_FIELD.getPreferredName(), totalCategoryCount); builder.field(FREQUENT_CATEGORY_COUNT_FIELD.getPreferredName(), frequentCategoryCount); @@ -399,6 +416,10 @@ public AssignmentMemoryBasis getAssignmentMemoryBasis() { return assignmentMemoryBasis; } + public Long getOutputMemmoryAllocatorBytes() { + return outputMemoryAllocatorBytes; + } + public long getCategorizedDocCount() { return categorizedDocCount; } @@ -458,6 +479,7 @@ public int hashCode() { bucketAllocationFailuresCount, memoryStatus, assignmentMemoryBasis, + outputMemoryAllocatorBytes, categorizedDocCount, totalCategoryCount, frequentCategoryCount, @@ -495,6 +517,7 @@ public boolean equals(Object other) { && this.bucketAllocationFailuresCount == that.bucketAllocationFailuresCount && Objects.equals(this.memoryStatus, that.memoryStatus) && Objects.equals(this.assignmentMemoryBasis, that.assignmentMemoryBasis) + && Objects.equals(this.outputMemoryAllocatorBytes, that.outputMemoryAllocatorBytes) && Objects.equals(this.categorizedDocCount, that.categorizedDocCount) && Objects.equals(this.totalCategoryCount, that.totalCategoryCount) && Objects.equals(this.frequentCategoryCount, that.frequentCategoryCount) @@ -520,6 +543,7 @@ public static class Builder { private long bucketAllocationFailuresCount; private MemoryStatus memoryStatus; private AssignmentMemoryBasis assignmentMemoryBasis; + private Long outputMemoryAllocatorBytes; private long categorizedDocCount; private long totalCategoryCount; private long frequentCategoryCount; @@ -549,6 +573,7 @@ public Builder(ModelSizeStats modelSizeStats) { this.bucketAllocationFailuresCount = modelSizeStats.bucketAllocationFailuresCount; this.memoryStatus = modelSizeStats.memoryStatus; this.assignmentMemoryBasis = modelSizeStats.assignmentMemoryBasis; + this.outputMemoryAllocatorBytes = modelSizeStats.outputMemoryAllocatorBytes; this.categorizedDocCount = modelSizeStats.categorizedDocCount; this.totalCategoryCount = modelSizeStats.totalCategoryCount; this.frequentCategoryCount = modelSizeStats.frequentCategoryCount; @@ -611,6 +636,11 @@ public Builder setAssignmentMemoryBasis(AssignmentMemoryBasis assignmentMemoryBa return this; } + public Builder setOutputMemoryAllocatorBytes(long outputMemoryAllocatorBytes) { + this.outputMemoryAllocatorBytes = outputMemoryAllocatorBytes; + return this; + } + public Builder setCategorizedDocCount(long categorizedDocCount) { this.categorizedDocCount = categorizedDocCount; return this; @@ -670,6 +700,7 @@ public ModelSizeStats build() { bucketAllocationFailuresCount, memoryStatus, assignmentMemoryBasis, + outputMemoryAllocatorBytes, categorizedDocCount, totalCategoryCount, frequentCategoryCount, diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java index 2279164a7cbea..91e2971f369e3 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/job/process/autodetect/state/ModelSizeStatsTests.java @@ -31,6 +31,7 @@ public void testDefaultConstructor() { assertEquals(0, stats.getBucketAllocationFailuresCount()); assertEquals(MemoryStatus.OK, stats.getMemoryStatus()); assertNull(stats.getAssignmentMemoryBasis()); + assertNull(stats.getOutputMemmoryAllocatorBytes()); assertEquals(0, stats.getCategorizedDocCount()); assertEquals(0, stats.getTotalCategoryCount()); assertEquals(0, stats.getFrequentCategoryCount()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java index b6b050a10c790..cb02990da74c9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java @@ -213,6 +213,12 @@ protected Table getTableWithHeader(RestRequest request) { .setAliases("mbaf", "modelBucketAllocationFailures") .build() ); + table.addCell( + "model.output_memory_allocator_bytes", + TableColumnAttributeBuilder.builder("how many bytes have been used to output the model documents", false) + .setAliases("momab", "modelOutputMemoryAllocatorBytes") + .build() + ); table.addCell( "model.categorization_status", TableColumnAttributeBuilder.builder("current categorization status", false) @@ -416,6 +422,11 @@ private Table buildTable(RestRequest request, Response jobStats) { table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalPartitionFieldCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getBucketAllocationFailuresCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizationStatus().toString()); + table.addCell( + modelSizeStats == null || modelSizeStats.getOutputMemmoryAllocatorBytes() == null + ? null + : ByteSizeValue.ofBytes(modelSizeStats.getOutputMemmoryAllocatorBytes()) + ); table.addCell(modelSizeStats == null ? null : modelSizeStats.getCategorizedDocCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getTotalCategoryCount()); table.addCell(modelSizeStats == null ? null : modelSizeStats.getFrequentCategoryCount()); From 2cf5d14d34a5900e58c12f75d81b1b24d97c93c9 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 18 Jun 2024 14:57:48 +1000 Subject: [PATCH 09/26] Add toString to recovery requests for better log messages (#109784) This PR adds meaningful string representation for recovery requests so that they are logged with useful information when the [recovery is delayed](https://github.com/elastic/elasticsearch/blob/0a7c99b4c51baf67aa62fff54d6688f455c35b1e/server/src/main/java/org/elasticsearch/indices/recovery/PeerRecoverySourceClusterStateDelay.java#L43). --- .../recovery/StartRecoveryRequest.java | 22 +++++++++++++++++++ .../StatelessPrimaryRelocationAction.java | 16 ++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/StartRecoveryRequest.java b/server/src/main/java/org/elasticsearch/indices/recovery/StartRecoveryRequest.java index 9cf5851454d6c..2ddfa9a3c1755 100644 --- a/server/src/main/java/org/elasticsearch/indices/recovery/StartRecoveryRequest.java +++ b/server/src/main/java/org/elasticsearch/indices/recovery/StartRecoveryRequest.java @@ -174,4 +174,26 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(canDownloadSnapshotFiles); } } + + @Override + public String toString() { + return "StartRecoveryRequest{" + + "shardId=" + + shardId + + ", targetNode=" + + targetNode.descriptionWithoutAttributes() + + ", recoveryId=" + + recoveryId + + ", targetAllocationId='" + + targetAllocationId + + "', clusterStateVersion=" + + clusterStateVersion + + ", primaryRelocation=" + + primaryRelocation + + ", startingSeqNo=" + + startingSeqNo + + ", canDownloadSnapshotFiles=" + + canDownloadSnapshotFiles + + '}'; + } } diff --git a/server/src/main/java/org/elasticsearch/indices/recovery/StatelessPrimaryRelocationAction.java b/server/src/main/java/org/elasticsearch/indices/recovery/StatelessPrimaryRelocationAction.java index bdc7f5b2aafce..46908fbeec107 100644 --- a/server/src/main/java/org/elasticsearch/indices/recovery/StatelessPrimaryRelocationAction.java +++ b/server/src/main/java/org/elasticsearch/indices/recovery/StatelessPrimaryRelocationAction.java @@ -102,5 +102,21 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(recoveryId, shardId, targetNode, targetAllocationId, clusterStateVersion); } + + @Override + public String toString() { + return "Request{" + + "shardId=" + + shardId + + ", targetNode=" + + targetNode.descriptionWithoutAttributes() + + ", recoveryId=" + + recoveryId + + ", targetAllocationId='" + + targetAllocationId + + "', clusterStateVersion=" + + clusterStateVersion + + '}'; + } } } From a3cac4c4627f41739e4a6872ee69a26c4e7abb02 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 18 Jun 2024 06:14:00 +0100 Subject: [PATCH 10/26] Revert "Remove `BaseNodesXContentResponse` (#109795)" This reverts commit 092b22c1bdc7c4326ea4c5bb3d84330be0199667. --- .../node/stats/NodesStatsResponse.java | 32 +++++---------- .../nodes/BaseNodesXContentResponse.java | 40 +++++++++++++++++++ 2 files changed, 50 insertions(+), 22 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodesStatsResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodesStatsResponse.java index 0be59206520cb..09bb6909191d1 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodesStatsResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/stats/NodesStatsResponse.java @@ -10,22 +10,20 @@ import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.support.TransportAction; -import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.action.support.nodes.BaseNodesXContentResponse; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; -import org.elasticsearch.common.xcontent.ChunkedToXContentObject; -import org.elasticsearch.rest.action.RestActions; import org.elasticsearch.xcontent.ToXContent; import java.io.IOException; import java.util.Iterator; import java.util.List; -public class NodesStatsResponse extends BaseNodesResponse implements ChunkedToXContentObject { +public class NodesStatsResponse extends BaseNodesXContentResponse { public NodesStatsResponse(ClusterName clusterName, List nodes, List failures) { super(clusterName, nodes, failures); @@ -42,25 +40,15 @@ protected void writeNodesTo(StreamOutput out, List nodes) throws IOEx } @Override - public Iterator toXContentChunked(ToXContent.Params params) { + protected Iterator xContentChunks(ToXContent.Params outerParams) { return Iterators.concat( - - ChunkedToXContentHelper.singleChunk((b, p) -> { - b.startObject(); - RestActions.buildNodesHeader(b, p, this); - return b.field("cluster_name", getClusterName().value()).startObject("nodes"); - }), - Iterators.flatMap( - getNodes().iterator(), - nodeStats -> Iterators.concat( - ChunkedToXContentHelper.singleChunk( - (b, p) -> b.startObject(nodeStats.getNode().getId()).field("timestamp", nodeStats.getTimestamp()) - ), - nodeStats.toXContentChunked(params), - ChunkedToXContentHelper.endObject() - ) - ), - ChunkedToXContentHelper.singleChunk((b, p) -> b.endObject().endObject()) + ChunkedToXContentHelper.startObject("nodes"), + Iterators.flatMap(getNodes().iterator(), nodeStats -> Iterators.concat(Iterators.single((builder, params) -> { + builder.startObject(nodeStats.getNode().getId()); + builder.field("timestamp", nodeStats.getTimestamp()); + return builder; + }), nodeStats.toXContentChunked(outerParams), ChunkedToXContentHelper.endObject())), + ChunkedToXContentHelper.endObject() ); } diff --git a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java new file mode 100644 index 0000000000000..ac193601212c1 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.support.nodes; + +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.rest.action.RestActions; +import org.elasticsearch.xcontent.ToXContent; + +import java.util.Iterator; +import java.util.List; + +public abstract class BaseNodesXContentResponse extends BaseNodesResponse + implements + ChunkedToXContent { + + protected BaseNodesXContentResponse(ClusterName clusterName, List nodes, List failures) { + super(clusterName, nodes, failures); + } + + @Override + public final Iterator toXContentChunked(ToXContent.Params params) { + return Iterators.concat(Iterators.single((b, p) -> { + b.startObject(); + RestActions.buildNodesHeader(b, p, this); + return b.field("cluster_name", getClusterName().value()); + }), xContentChunks(params), ChunkedToXContentHelper.endObject()); + } + + protected abstract Iterator xContentChunks(ToXContent.Params outerParams); +} From c2ca504c1bc3e3e0dcf152255311a2db34fb5e81 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 18 Jun 2024 08:31:39 +0100 Subject: [PATCH 11/26] Add an explicit synthetic mode for nested fields (#109809) This change adds a synthetic mode for nested fields that recursively load nested objects from stored fields and doc values. The order of the sub-objects is preserved since they are indexed in separate Lucene documents. This change also introduces the `store_array_source` mode in the nested field options. This option is disabled by default when synthetic is used but users can opt-in for this behaviour. --- .../indices.create/20_synthetic_source.yml | 222 ++++++++++++++++-- .../index/mapper/DocumentParser.java | 14 +- .../index/mapper/DocumentParserContext.java | 8 +- .../mapper/IgnoredSourceFieldMapper.java | 16 +- .../index/mapper/NestedObjectMapper.java | 135 ++++++++++- .../index/mapper/ObjectMapper.java | 41 ++-- .../mapper/IgnoredSourceFieldMapperTests.java | 8 +- .../index/mapper/NestedObjectMapperTests.java | 170 ++++++++++++++ .../index/mapper/MapperServiceTestCase.java | 28 ++- 9 files changed, 579 insertions(+), 63 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index a763d6e457490..3d95712d30b30 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -863,18 +863,22 @@ nested object: - '{ "create": { } }' - '{ "name": "aaaa", "nested_field": {"a": 1, "b": 2}, "nested_array": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 }] }' + - match: { errors: false } + - do: search: index: test - - match: { hits.total.value: 1 } - - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.nested_field.a: 1 } - - match: { hits.hits.0._source.nested_field.b: 2 } - - match: { hits.hits.0._source.nested_array.0.a: 10 } - - match: { hits.hits.0._source.nested_array.0.b: 20 } - - match: { hits.hits.0._source.nested_array.1.a: 100 } - - match: { hits.hits.0._source.nested_array.1.b: 200 } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.name: aaaa } + - length: { hits.hits.0._source.nested_field: 2 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_array: 2 } + - match: { hits.hits.0._source.nested_array.0.a: 10 } + - match: { hits.hits.0._source.nested_array.0.b: 20 } + - match: { hits.hits.0._source.nested_array.1.a: 100 } + - match: { hits.hits.0._source.nested_array.1.b: 200 } --- @@ -906,15 +910,201 @@ nested object next to regular: - '{ "create": { } }' - '{ "name": "aaaa", "path": { "to": { "nested": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 } ], "regular": [{ "a": 10, "b": 20 }, { "a": 100, "b": 200 } ] } } }' + - match: { errors: false } + - do: search: index: test - - match: { hits.total.value: 1 } - - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.path.to.nested.0.a: 10 } - - match: { hits.hits.0._source.path.to.nested.0.b: 20 } - - match: { hits.hits.0._source.path.to.nested.1.a: 100 } - - match: { hits.hits.0._source.path.to.nested.1.b: 200 } - - match: { hits.hits.0._source.path.to.regular.a: [ 10, 100 ] } - - match: { hits.hits.0._source.path.to.regular.b: [ 20, 200 ] } + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.name: aaaa } + - length: { hits.hits.0._source.path.to.nested: 2 } + - match: { hits.hits.0._source.path.to.nested.0.a: 10 } + - match: { hits.hits.0._source.path.to.nested.0.b: 20 } + - match: { hits.hits.0._source.path.to.nested.1.a: 100 } + - match: { hits.hits.0._source.path.to.nested.1.b: 200 } + - match: { hits.hits.0._source.path.to.regular.a: [ 10, 100 ] } + - match: { hits.hits.0._source.path.to.regular.b: [ 20, 200 ] } + + +--- +nested object with disabled: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + obj_field: + properties: + obj1: + enabled: false + sub_nested: + type: nested + nested_field: + type: nested + properties: + obj1: + enabled: false + nested_array: + type: nested + properties: + obj1: + enabled: false + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 0, "nested_field": {"a": 1, "b": 2, "obj1": { "foo": "bar", "k": [1, 2, 3]}}, "nested_array": [{ "a": 10, "b": 20, "obj1": [{"field1": 1, "field2": 2}, {"field3": 3, "field4": 4}]}, { "a": 100, "b": 200, "obj1": {"field5": 5, "field6": 6}}]}' + - '{ "create": { } }' + - '{ "id": 1, "obj_field": {"a": 1, "b": 2, "obj1": { "foo": "bar", "k": [1, 2, 3]}, "sub_nested": [{ "a": 10, "b": 20}, { "a": 100, "b": 200}]}}' + + - match: { errors: false } + + - do: + search: + index: test + sort: "id" + + - match: { hits.total.value: 2 } + - length: { hits.hits.0._source: 3 } + - match: { hits.hits.0._source.id: 0 } + - length: { hits.hits.0._source.nested_field: 3 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_field.obj1: 2 } + - match: { hits.hits.0._source.nested_field.obj1.foo: "bar" } + - match: { hits.hits.0._source.nested_field.obj1.k: [1, 2, 3] } + - length: { hits.hits.0._source.nested_array: 2 } + - match: { hits.hits.0._source.nested_array.0.a: 10 } + - match: { hits.hits.0._source.nested_array.0.b: 20 } + - length: { hits.hits.0._source.nested_array.0.obj1: 2 } + - match: { hits.hits.0._source.nested_array.0.obj1.0.field1: 1 } + - match: { hits.hits.0._source.nested_array.0.obj1.0.field2: 2 } + - match: { hits.hits.0._source.nested_array.0.obj1.1.field3: 3 } + - match: { hits.hits.0._source.nested_array.0.obj1.1.field4: 4 } + - length: { hits.hits.0._source.nested_array.1: 3 } + - match: { hits.hits.0._source.nested_array.1.a: 100 } + - match: { hits.hits.0._source.nested_array.1.b: 200 } + - length: { hits.hits.0._source.nested_array.1.obj1: 2 } + - match: { hits.hits.0._source.nested_array.1.obj1.field5: 5 } + - match: { hits.hits.0._source.nested_array.1.obj1.field6: 6 } + - length: { hits.hits.1._source: 2 } + - match: { hits.hits.1._source.id: 1 } + - length: { hits.hits.1._source.obj_field: 4 } + - match: { hits.hits.1._source.obj_field.a: 1 } + - match: { hits.hits.1._source.obj_field.b: 2 } + - length: { hits.hits.1._source.obj_field.obj1: 2 } + - match: { hits.hits.1._source.obj_field.obj1.foo: "bar" } + - match: { hits.hits.1._source.obj_field.obj1.k: [ 1, 2, 3 ] } + - length: { hits.hits.1._source.obj_field.sub_nested: 2 } + - length: { hits.hits.1._source.obj_field.sub_nested.0: 2 } + - match: { hits.hits.1._source.obj_field.sub_nested.0.a: 10 } + - match: { hits.hits.1._source.obj_field.sub_nested.0.b: 20 } + - length: { hits.hits.1._source.obj_field.sub_nested.1: 2 } + - match: { hits.hits.1._source.obj_field.sub_nested.1.a: 100 } + - match: { hits.hits.1._source.obj_field.sub_nested.1.b: 200 } + + +--- +doubly nested object: + - requires: + cluster_features: ["mapper.track_ignored_source"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + obj_field: + properties: + obj1: + enabled: false + sub_nested: + type: nested + nested_field: + type: nested + properties: + sub_nested_field: + type: nested + properties: + obj1: + enabled: false + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 0, "nested_field": {"a": 1, "b": 2, "sub_nested_field": {"foo": "bar", "k": [1, 2, 3]}}}' + - '{ "create": { } }' + - '{ "id": 1, "nested_field": {"a": 2, "b": 3, "sub_nested_field": [{"foo": "baz", "k": [4, 50, 6]}, {"foo": "bar"}]}}' + - '{ "create": { } }' + - '{ "id": 2, "nested_field": [{"a": 20, "b": 30, "sub_nested_field": [{"foo": "foobar", "k": [7, 8, 9]}, {"k": [400, 500, 6]}]}, {"a": 0, "b": 33, "sub_nested_field": [{"other": "value", "k": [1, 2, -3]}, {"number": 42}]}]}' + - '{ "create": { } }' + - '{ "id": 3}' + + - match: { errors: false } + + - do: + search: + index: test + sort: "id" + + - match: { hits.total.value: 4 } + - length: { hits.hits.0._source: 2 } + - match: { hits.hits.0._source.id: 0 } + - length: { hits.hits.0._source.nested_field: 3 } + - match: { hits.hits.0._source.nested_field.a: 1 } + - match: { hits.hits.0._source.nested_field.b: 2 } + - length: { hits.hits.0._source.nested_field.sub_nested_field: 2 } + - match: { hits.hits.0._source.nested_field.sub_nested_field.foo: "bar" } + - match: { hits.hits.0._source.nested_field.sub_nested_field.k: [ 1, 2, 3 ] } + - length: { hits.hits.1._source: 2 } + - match: { hits.hits.1._source.id: 1 } + - length: { hits.hits.1._source.nested_field: 3 } + - match: { hits.hits.1._source.nested_field.a: 2 } + - match: { hits.hits.1._source.nested_field.b: 3 } + - length: { hits.hits.1._source.nested_field.sub_nested_field: 2 } + - length: { hits.hits.1._source.nested_field.sub_nested_field.0: 2 } + - match: { hits.hits.1._source.nested_field.sub_nested_field.0.foo: "baz" } + - match: { hits.hits.1._source.nested_field.sub_nested_field.0.k: [ 4, 6, 50 ] } + - length: { hits.hits.1._source.nested_field.sub_nested_field.1: 1 } + - match: { hits.hits.1._source.nested_field.sub_nested_field.1.foo: "bar" } + - length: { hits.hits.2._source: 2 } + - match: { hits.hits.2._source.id: 2 } + - length: { hits.hits.2._source.nested_field: 2 } + - length: { hits.hits.2._source.nested_field.0: 3 } + - match: { hits.hits.2._source.nested_field.0.a: 20 } + - match: { hits.hits.2._source.nested_field.0.b: 30 } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field: 2 } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field.0: 2 } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.0.foo: "foobar" } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.0.k: [ 7, 8, 9 ] } + - length: { hits.hits.2._source.nested_field.0.sub_nested_field.1: 1 } + - match: { hits.hits.2._source.nested_field.0.sub_nested_field.1.k: [6, 400, 500] } + - length: { hits.hits.2._source.nested_field.1: 3 } + - match: { hits.hits.2._source.nested_field.1.a: 0 } + - match: { hits.hits.2._source.nested_field.1.b: 33 } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field: 2 } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field.0: 2 } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.0.other: "value" } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.0.k: [ -3, 1, 2 ] } + - length: { hits.hits.2._source.nested_field.1.sub_nested_field.1: 1 } + - match: { hits.hits.2._source.nested_field.1.sub_nested_field.1.number: 42 } + - length: { hits.hits.3._source: 1 } + - match: { hits.hits.3._source.id: 3 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 3d4f0823bb1cf..034e8fd0770f3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -132,7 +132,8 @@ private static void internalParseDocument(MetadataFieldMapper[] metadataFieldsMa new IgnoredSourceFieldMapper.NameValue( MapperService.SINGLE_MAPPING_NAME, 0, - XContentDataHelper.encodeToken(context.parser()) + XContentDataHelper.encodeToken(context.parser()), + context.doc() ) ); } else { @@ -268,7 +269,8 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio new IgnoredSourceFieldMapper.NameValue( context.parent().fullPath(), context.parent().fullPath().indexOf(currentFieldName), - XContentDataHelper.encodeToken(parser) + XContentDataHelper.encodeToken(parser), + context.doc() ) ); } else { @@ -288,13 +290,14 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio if (context.parent().isNested()) { // Handle a nested object that doesn't contain an array. Arrays are handled in #parseNonDynamicArray. - if (context.mappingLookup().isSourceSynthetic() && context.getClonedSource() == false) { + if (context.parent().storeArraySource() && context.mappingLookup().isSourceSynthetic() && context.getClonedSource() == false) { Tuple tuple = XContentDataHelper.cloneSubContext(context); context.addIgnoredField( new IgnoredSourceFieldMapper.NameValue( context.parent().name(), context.parent().fullPath().indexOf(context.parent().simpleName()), - XContentDataHelper.encodeXContentBuilder(tuple.v2()) + XContentDataHelper.encodeXContentBuilder(tuple.v2()), + context.doc() ) ); context = tuple.v1(); @@ -661,9 +664,8 @@ private static void parseNonDynamicArray( && (objectMapper.storeArraySource() || objectMapper.dynamic == ObjectMapper.Dynamic.RUNTIME); boolean fieldWithFallbackSyntheticSource = mapper instanceof FieldMapper fieldMapper && fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK; - boolean nestedObject = mapper instanceof NestedObjectMapper; boolean dynamicRuntimeContext = context.dynamic() == ObjectMapper.Dynamic.RUNTIME; - if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || nestedObject || dynamicRuntimeContext) { + if (objectRequiresStoringSource || fieldWithFallbackSyntheticSource || dynamicRuntimeContext) { Tuple tuple = XContentDataHelper.cloneSubContext(context); context.addIgnoredField( IgnoredSourceFieldMapper.NameValue.fromContext( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index fe1ad85d6a7c1..f47d86b746a38 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -24,13 +24,11 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.TreeSet; /** * Context used when parsing incoming documents. Holds everything that is needed to parse a document as well as @@ -106,7 +104,7 @@ public int get() { private final MappingParserContext mappingParserContext; private final SourceToParse sourceToParse; private final Set ignoredFields; - private final Set ignoredFieldValues; + private final List ignoredFieldValues; private final Map> dynamicMappers; private final DynamicMapperSize dynamicMappersSize; private final Map dynamicObjectMappers; @@ -128,7 +126,7 @@ private DocumentParserContext( MappingParserContext mappingParserContext, SourceToParse sourceToParse, Set ignoreFields, - Set ignoredFieldValues, + List ignoredFieldValues, Map> dynamicMappers, Map dynamicObjectMappers, Map> dynamicRuntimeFields, @@ -198,7 +196,7 @@ protected DocumentParserContext( mappingParserContext, source, new HashSet<>(), - new TreeSet<>(Comparator.comparing(IgnoredSourceFieldMapper.NameValue::name)), + new ArrayList<>(), new HashMap<>(), new HashMap<>(), new HashMap<>(), diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 6e243e3575d37..f64511f8396ec 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -17,7 +17,10 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; +import java.util.List; /** @@ -53,7 +56,7 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { * the full name of the parent field * - the value, encoded as a byte array */ - public record NameValue(String name, int parentOffset, BytesRef value) { + public record NameValue(String name, int parentOffset, BytesRef value, LuceneDocument doc) { /** * Factory method, for use with fields under the parent object. It doesn't apply to objects at root level. * @param context the parser context, containing a non-null parent @@ -62,7 +65,7 @@ public record NameValue(String name, int parentOffset, BytesRef value) { */ public static NameValue fromContext(DocumentParserContext context, String name, BytesRef value) { int parentOffset = context.parent() instanceof RootObjectMapper ? 0 : context.parent().fullPath().length() + 1; - return new NameValue(name, parentOffset, value); + return new NameValue(name, parentOffset, value, context.doc()); } String getParentFieldName() { @@ -112,8 +115,11 @@ protected String contentType() { public void postParse(DocumentParserContext context) { // Ignored values are only expected in synthetic mode. assert context.getIgnoredFieldValues().isEmpty() || context.mappingLookup().isSourceSynthetic(); - for (NameValue nameValue : context.getIgnoredFieldValues()) { - context.doc().add(new StoredField(NAME, encode(nameValue))); + List ignoredFieldValues = new ArrayList<>(context.getIgnoredFieldValues()); + // ensure consistent ordering when retrieving synthetic source + Collections.sort(ignoredFieldValues, Comparator.comparing(NameValue::name)); + for (NameValue nameValue : ignoredFieldValues) { + nameValue.doc().add(new StoredField(NAME, encode(nameValue))); } } @@ -136,7 +142,7 @@ static NameValue decode(Object field) { int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; String name = new String(bytes, 4, nameSize, StandardCharsets.UTF_8); BytesRef value = new BytesRef(bytes, 4 + nameSize, bytes.length - nameSize - 4); - return new NameValue(name, parentOffset, value); + return new NameValue(name, parentOffset, value, null); } // This mapper doesn't contribute to source directly as it has no access to the object structure. Instead, its contents diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java index 65748847406ea..4bc633296a832 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java @@ -8,19 +8,31 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.util.BitSet; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; +import org.elasticsearch.index.fieldvisitor.LeafStoredFieldLoader; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.function.Function; +import java.util.function.Supplier; +import java.util.stream.Stream; + +import static org.elasticsearch.index.mapper.SourceFieldMetrics.NOOP; /** * A Mapper for nested objects @@ -34,12 +46,12 @@ public static class Builder extends ObjectMapper.Builder { private Explicit includeInRoot = Explicit.IMPLICIT_FALSE; private Explicit includeInParent = Explicit.IMPLICIT_FALSE; private final IndexVersion indexCreatedVersion; - private final Function bitsetProducer; + private final Function bitSetProducer; public Builder(String name, IndexVersion indexCreatedVersion, Function bitSetProducer) { super(name, Explicit.IMPLICIT_TRUE); this.indexCreatedVersion = indexCreatedVersion; - this.bitsetProducer = bitSetProducer; + this.bitSetProducer = bitSetProducer; } Builder includeInRoot(boolean includeInRoot) { @@ -91,12 +103,13 @@ public NestedObjectMapper build(MapperBuilderContext context) { buildMappers(nestedContext), enabled, dynamic, + storeArraySource, includeInParent, includeInRoot, parentTypeFilter, nestedTypePath, nestedTypeFilter, - bitsetProducer + bitSetProducer ); } } @@ -179,6 +192,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { Map mappers, Explicit enabled, ObjectMapper.Dynamic dynamic, + Explicit storeArraySource, Explicit includeInParent, Explicit includeInRoot, Query parentTypeFilter, @@ -186,7 +200,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { Query nestedTypeFilter, Function bitsetProducer ) { - super(name, fullPath, enabled, Explicit.IMPLICIT_TRUE, Explicit.IMPLICIT_FALSE, dynamic, mappers); + super(name, fullPath, enabled, Explicit.IMPLICIT_TRUE, storeArraySource, dynamic, mappers); this.parentTypeFilter = parentTypeFilter; this.nestedTypePath = nestedTypePath; this.nestedTypeFilter = nestedTypeFilter; @@ -246,6 +260,7 @@ NestedObjectMapper withoutMappers() { Map.of(), enabled, dynamic, + storeArraySource, includeInParent, includeInRoot, parentTypeFilter, @@ -271,6 +286,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (isEnabled() != Defaults.ENABLED) { builder.field("enabled", enabled.value()); } + if (storeArraySource != Defaults.STORE_ARRAY_SOURCE) { + builder.field(STORE_ARRAY_SOURCE_PARAM, storeArraySource.value()); + } serializeMappers(builder, params); return builder.endObject(); } @@ -317,6 +335,7 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex mergeResult.mappers(), mergeResult.enabled(), mergeResult.dynamic(), + mergeResult.trackArraySource(), incInParent, incInRoot, parentTypeFilter, @@ -346,7 +365,111 @@ protected MapperMergeContext createChildContext(MapperMergeContext mapperMergeCo @Override public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - // IgnoredSourceFieldMapper integration takes care of writing the source for nested objects. - return SourceLoader.SyntheticFieldLoader.NOTHING; + if (storeArraySource()) { + // IgnoredSourceFieldMapper integration takes care of writing the source for nested objects that enabled store_array_source. + return SourceLoader.SyntheticFieldLoader.NOTHING; + } + + SourceLoader sourceLoader = new SourceLoader.Synthetic(() -> super.syntheticFieldLoader(mappers.values().stream(), true), NOOP); + var storedFieldLoader = org.elasticsearch.index.fieldvisitor.StoredFieldLoader.create(false, sourceLoader.requiredStoredFields()); + return new NestedSyntheticFieldLoader( + storedFieldLoader, + sourceLoader, + () -> bitsetProducer.apply(parentTypeFilter), + nestedTypeFilter + ); + } + + private class NestedSyntheticFieldLoader implements SourceLoader.SyntheticFieldLoader { + private final org.elasticsearch.index.fieldvisitor.StoredFieldLoader storedFieldLoader; + private final SourceLoader sourceLoader; + private final Supplier parentBitSetProducer; + private final Query childFilter; + + private LeafStoredFieldLoader leafStoredFieldLoader; + private SourceLoader.Leaf leafSourceLoader; + private final List children = new ArrayList<>(); + + private NestedSyntheticFieldLoader( + org.elasticsearch.index.fieldvisitor.StoredFieldLoader storedFieldLoader, + SourceLoader sourceLoader, + Supplier parentBitSetProducer, + Query childFilter + ) { + this.storedFieldLoader = storedFieldLoader; + this.sourceLoader = sourceLoader; + this.parentBitSetProducer = parentBitSetProducer; + this.childFilter = childFilter; + } + + @Override + public Stream> storedFieldLoaders() { + return Stream.of(); + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + this.children.clear(); + this.leafStoredFieldLoader = storedFieldLoader.getLoader(leafReader.getContext(), null); + this.leafSourceLoader = sourceLoader.leaf(leafReader, null); + + IndexSearcher searcher = new IndexSearcher(leafReader); + searcher.setQueryCache(null); + var childScorer = searcher.createWeight(childFilter, ScoreMode.COMPLETE_NO_SCORES, 1f).scorer(leafReader.getContext()); + if (childScorer != null) { + var parentDocs = parentBitSetProducer.get().getBitSet(leafReader.getContext()); + return parentDoc -> { + collectChildren(parentDoc, parentDocs, childScorer.iterator()); + return children.size() > 0; + }; + } else { + return parentDoc -> false; + } + } + + private List collectChildren(int parentDoc, BitSet parentDocs, DocIdSetIterator childIt) throws IOException { + assert parentDocs.get(parentDoc) : "wrong context, doc " + parentDoc + " is not a parent of " + nestedTypePath; + final int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); + int childDocId = childIt.docID(); + if (childDocId <= prevParentDoc) { + childDocId = childIt.advance(prevParentDoc + 1); + } + + children.clear(); + for (; childDocId < parentDoc; childDocId = childIt.nextDoc()) { + children.add(childDocId); + } + return children; + } + + @Override + public boolean hasValue() { + return children.size() > 0; + } + + @Override + public void write(XContentBuilder b) throws IOException { + assert (children != null && children.size() > 0); + if (children.size() == 1) { + b.startObject(simpleName()); + leafStoredFieldLoader.advanceTo(children.get(0)); + leafSourceLoader.write(leafStoredFieldLoader, children.get(0), b); + b.endObject(); + } else { + b.startArray(simpleName()); + for (int childId : children) { + b.startObject(); + leafStoredFieldLoader.advanceTo(childId); + leafSourceLoader.write(leafStoredFieldLoader, childId, b); + b.endObject(); + } + b.endArray(); + } + } + + @Override + public String fieldName() { + return name(); + } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index b21b77bc86dd8..356c103756bac 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -756,12 +756,16 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep } - public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream mappers) { + protected SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream mappers, boolean isFragment) { var fields = mappers.sorted(Comparator.comparing(Mapper::name)) .map(Mapper::syntheticFieldLoader) .filter(l -> l != SourceLoader.SyntheticFieldLoader.NOTHING) .toList(); - return new SyntheticSourceFieldLoader(fields); + return new SyntheticSourceFieldLoader(fields, isFragment); + } + + public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(Stream mappers) { + return syntheticFieldLoader(mappers, false); } @Override @@ -771,11 +775,13 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { private class SyntheticSourceFieldLoader implements SourceLoader.SyntheticFieldLoader { private final List fields; + private final boolean isFragment; private boolean hasValue; private List ignoredValues; - private SyntheticSourceFieldLoader(List fields) { + private SyntheticSourceFieldLoader(List fields, boolean isFragment) { this.fields = fields; + this.isFragment = isFragment; } @Override @@ -830,18 +836,21 @@ public void write(XContentBuilder b) throws IOException { if (hasValue == false) { return; } - if (isRoot()) { - if (isEnabled() == false) { - // If the root object mapper is disabled, it is expected to contain - // the source encapsulated within a single ignored source value. - assert ignoredValues.size() == 1 : ignoredValues.size(); - XContentDataHelper.decodeAndWrite(b, ignoredValues.get(0).value()); - ignoredValues = null; - return; + if (isRoot() && isEnabled() == false) { + // If the root object mapper is disabled, it is expected to contain + // the source encapsulated within a single ignored source value. + assert ignoredValues.size() == 1 : ignoredValues.size(); + XContentDataHelper.decodeAndWrite(b, ignoredValues.get(0).value()); + ignoredValues = null; + return; + } + + if (isFragment == false) { + if (isRoot()) { + b.startObject(); + } else { + b.startObject(simpleName()); } - b.startObject(); - } else { - b.startObject(simpleName()); } if (ignoredValues != null && ignoredValues.isEmpty() == false) { @@ -868,7 +877,9 @@ public void write(XContentBuilder b) throws IOException { } } hasValue = false; - b.endObject(); + if (isFragment == false) { + b.endObject(); + } } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 71a0e001dc72a..e7f8a16c5cc10 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -87,8 +87,8 @@ public void testMultipleIgnoredFieldsRootObject() throws IOException { String stringValue = randomAlphaOfLength(20); String syntheticSource = getSyntheticSourceWithFieldLimit(b -> { b.field("boolean_value", booleanValue); - b.field("string_value", stringValue); b.field("int_value", intValue); + b.field("string_value", stringValue); }); assertEquals(String.format(Locale.ROOT, """ {"boolean_value":%s,"int_value":%s,"string_value":"%s"}""", booleanValue, intValue, stringValue), syntheticSource); @@ -626,6 +626,7 @@ public void testNestedObjectWithField() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path").field("type", "nested"); { + b.field("store_array_source", true); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -647,6 +648,7 @@ public void testNestedObjectWithArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path").field("type", "nested"); { + b.field("store_array_source", true); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -679,6 +681,7 @@ public void testNestedSubobjectWithField() throws IOException { b.startObject("int_value").field("type", "integer").endObject(); b.startObject("to").field("type", "nested"); { + b.field("store_array_source", true); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -719,6 +722,7 @@ public void testNestedSubobjectWithArray() throws IOException { b.startObject("int_value").field("type", "integer").endObject(); b.startObject("to").field("type", "nested"); { + b.field("store_array_source", true); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -758,7 +762,7 @@ public void testNestedSubobjectWithArray() throws IOException { public void testNestedObjectIncludeInRoot() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { - b.startObject("path").field("type", "nested").field("include_in_root", true); + b.startObject("path").field("type", "nested").field("store_array_source", true).field("include_in_root", true); { b.startObject("properties"); { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java index 412077b659b98..c767429d4c0fb 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java @@ -29,6 +29,7 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.function.Function; import static org.hamcrest.Matchers.containsString; @@ -1567,6 +1568,175 @@ public void testNestedMapperFilters() throws Exception { assertThat(mapper2.parentTypeFilter(), equalTo(mapper1.nestedTypeFilter())); } + public void testStoreArraySourceinSyntheticSourceMode() throws IOException { + DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { + b.startObject("o").field("type", "nested").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + })); + assertNotNull(mapper.mapping().getRoot().getMapper("o")); + } + + public void testStoreArraySourceThrowsInNonSyntheticSourceMode() { + var exception = expectThrows(MapperParsingException.class, () -> createDocumentMapper(mapping(b -> { + b.startObject("o").field("type", "nested").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + }))); + assertEquals("Parameter [store_array_source] can only be set in synthetic source mode.", exception.getMessage()); + } + + public void testSyntheticNestedWithObject() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").field("type", "nested"); + { + b.startObject("properties"); + { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource( + documentMapper, + b -> { b.startObject("path").field("foo", "A").field("bar", "B").endObject(); } + ); + assertEquals(""" + {"path":{"bar":"B","foo":"A"}}""", syntheticSource); + } + + public void testSyntheticNestedWithArray() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").field("type", "nested"); + { + b.startObject("properties"); + { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startArray("path"); + { + b.startObject().field("foo", "A").field("bar", "B").endObject(); + b.startObject().field("foo", "C").field("bar", "D").endObject(); + } + b.endArray(); + }); + assertEquals(""" + {"path":[{"bar":"B","foo":"A"},{"bar":"D","foo":"C"}]}""", syntheticSource); + } + + public void testSyntheticNestedWithSubObjects() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("boolean_value").field("type", "boolean").endObject(); + b.startObject("path"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + b.startObject("to").field("type", "nested"); + { + b.startObject("properties"); + { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + + boolean booleanValue = randomBoolean(); + int intValue = randomInt(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.field("boolean_value", booleanValue); + b.startObject("path"); + { + b.field("int_value", intValue); + b.startObject("to").field("foo", "A").field("bar", "B").endObject(); + } + b.endObject(); + }); + assertEquals(String.format(Locale.ROOT, """ + {"boolean_value":%s,"path":{"int_value":%s,"to":{"bar":"B","foo":"A"}}}""", booleanValue, intValue), syntheticSource); + } + + public void testSyntheticNestedWithSubArrays() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("boolean_value").field("type", "boolean").endObject(); + b.startObject("path"); + { + b.field("type", "object"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + b.startObject("to").field("type", "nested"); + { + b.startObject("properties"); + { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + + boolean booleanValue = randomBoolean(); + int intValue = randomInt(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.field("boolean_value", booleanValue); + b.startObject("path"); + { + b.field("int_value", intValue); + b.startArray("to"); + { + b.startObject().field("foo", "A").field("bar", "B").endObject(); + b.startObject().field("foo", "C").field("bar", "D").endObject(); + } + b.endArray(); + } + b.endObject(); + }); + assertEquals( + String.format(Locale.ROOT, """ + {"boolean_value":%s,"path":{"int_value":%s,"to":[{"bar":"B","foo":"A"},{"bar":"D","foo":"C"}]}}""", booleanValue, intValue), + syntheticSource + ); + } + + public void testSyntheticNestedWithIncludeInRoot() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").field("type", "nested").field("include_in_root", true); + { + b.startObject("properties"); + { + b.startObject("foo").field("type", "keyword").endObject(); + b.startObject("bar").field("type", "keyword").endObject(); + } + b.endObject(); + } + b.endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource( + documentMapper, + b -> { b.startObject("path").field("foo", "A").field("bar", "B").endObject(); } + ); + assertEquals(""" + {"path":{"bar":"B","foo":"A"}}""", syntheticSource); + } + private NestedObjectMapper createNestedObjectMapperWithAllParametersSet(CheckedConsumer propertiesBuilder) throws IOException { DocumentMapper mapper = createDocumentMapper(mapping(b -> { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 50436ad64c8af..0486022620398 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; @@ -34,6 +35,7 @@ import org.elasticsearch.common.util.MockPageCacheRecycler; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; +import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.AnalyzerScope; @@ -773,12 +775,14 @@ protected RandomIndexWriter indexWriterForSyntheticSource(Directory directory) t protected final String syntheticSource(DocumentMapper mapper, CheckedConsumer build) throws IOException { try (Directory directory = newDirectory()) { RandomIndexWriter iw = indexWriterForSyntheticSource(directory); - LuceneDocument doc = mapper.parse(source(build)).rootDoc(); - iw.addDocument(doc); + ParsedDocument doc = mapper.parse(source(build)); + doc.updateSeqID(0, 0); + doc.version().setLongValue(0); + iw.addDocuments(doc.docs()); iw.close(); - try (DirectoryReader reader = DirectoryReader.open(directory)) { - String syntheticSource = syntheticSource(mapper, reader, 0); - roundTripSyntheticSource(mapper, syntheticSource, reader); + try (DirectoryReader indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) { + String syntheticSource = syntheticSource(mapper, indexReader, doc.docs().size() - 1); + roundTripSyntheticSource(mapper, syntheticSource, indexReader); return syntheticSource; } } @@ -797,10 +801,14 @@ protected final String syntheticSource(DocumentMapper mapper, CheckedConsumer Date: Tue, 18 Jun 2024 08:57:56 +0100 Subject: [PATCH 12/26] AwaitsFix: https://github.com/elastic/elasticsearch/issues/109838 --- muted-tests.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index b88be6ea829ae..b14943c7c7b69 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -10,7 +10,8 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -19,7 +20,8 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -34,7 +36,8 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ + \ (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -61,6 +64,9 @@ tests: - class: org.elasticsearch.action.search.SearchProgressActionListenerIT method: testSearchProgressWithHits issue: https://github.com/elastic/elasticsearch/issues/109830 +- class: "org.elasticsearch.xpack.shutdown.NodeShutdownReadinessIT" + issue: "https://github.com/elastic/elasticsearch/issues/109838" + method: "testShutdownReadinessService" # Examples: # From 6eb9c10d0ccb164f623510ec63dc5fb916511b47 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 18 Jun 2024 10:07:14 +0200 Subject: [PATCH 13/26] Check array size before returning array item in script doc values (#109824) When accessing array elements from a script, if the backing array has enough items, meaning that there has previously been a doc with enough values, we let the request go through, and we end up returning items from the previous doc that had a value at that position if the current doc does not have enough elements. We should instead validate the length of the array for the current doc and eventually throw an error if the index goes over the available number of values. Closes #104998 --- docs/changelog/109824.yaml | 6 +++ .../index/fielddata/ScriptDocValues.java | 40 ++++++++----------- .../ScriptDocValuesGeoPointsTests.java | 2 + .../fielddata/ScriptDocValuesLongsTests.java | 3 ++ .../UnsignedLongScriptDocValues.java | 1 + 5 files changed, 28 insertions(+), 24 deletions(-) create mode 100644 docs/changelog/109824.yaml diff --git a/docs/changelog/109824.yaml b/docs/changelog/109824.yaml new file mode 100644 index 0000000000000..987e8c0a8b1a2 --- /dev/null +++ b/docs/changelog/109824.yaml @@ -0,0 +1,6 @@ +pr: 109824 +summary: Check array size before returning array item in script doc values +area: Infra/Scripting +type: bug +issues: + - 104998 diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java index 78e0c14b81e20..f6669075480dd 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ScriptDocValues.java @@ -95,6 +95,12 @@ protected void throwIfEmpty() { } } + protected void throwIfBeyondLength(int i) { + if (i >= size()) { + throw new IndexOutOfBoundsException("A document doesn't have a value for a field at position [" + i + "]!"); + } + } + public static class Longs extends ScriptDocValues { public Longs(Supplier supplier) { @@ -108,6 +114,7 @@ public long getValue() { @Override public Long get(int index) { throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } @@ -133,12 +140,7 @@ public ZonedDateTime getValue() { @Override public ZonedDateTime get(int index) { - if (supplier.size() == 0) { - throw new IllegalStateException( - "A document doesn't have a value for a field! " - + "Use doc[].size()==0 to check if a document is missing a field!" - ); - } + throwIfEmpty(); if (index >= supplier.size()) { throw new IndexOutOfBoundsException( "attempted to fetch the [" + index + "] date when there are only [" + supplier.size() + "] dates." @@ -207,12 +209,8 @@ public double getValue() { @Override public Double get(int index) { - if (supplier.size() == 0) { - throw new IllegalStateException( - "A document doesn't have a value for a field! " - + "Use doc[].size()==0 to check if a document is missing a field!" - ); - } + throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } @@ -312,12 +310,8 @@ public double getLon() { @Override public GeoPoint get(int index) { - if (supplier.size() == 0) { - throw new IllegalStateException( - "A document doesn't have a value for a field! " - + "Use doc[].size()==0 to check if a document is missing a field!" - ); - } + throwIfEmpty(); + throwIfBeyondLength(index); final GeoPoint point = supplier.getInternal(index); return new GeoPoint(point.lat(), point.lon()); } @@ -408,6 +402,7 @@ public boolean getValue() { @Override public Boolean get(int index) { throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } @@ -484,12 +479,8 @@ public String getValue() { @Override public String get(int index) { - if (supplier.size() == 0) { - throw new IllegalStateException( - "A document doesn't have a value for a field! " - + "Use doc[].size()==0 to check if a document is missing a field!" - ); - } + throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } @@ -513,6 +504,7 @@ public BytesRef getValue() { @Override public BytesRef get(int index) { throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } diff --git a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesGeoPointsTests.java b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesGeoPointsTests.java index 2b96636d36a90..a0822141aea22 100644 --- a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesGeoPointsTests.java +++ b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesGeoPointsTests.java @@ -122,6 +122,8 @@ public void testMissingValues() throws IOException { geoPoints.getSupplier().setNextDocId(d); if (points[d].length > 0) { assertEquals(points[d][0], geoPoints.getValue()); + Exception e = expectThrows(IndexOutOfBoundsException.class, () -> geoPoints.get(geoPoints.size())); + assertEquals("A document doesn't have a value for a field at position [" + geoPoints.size() + "]!", e.getMessage()); } else { Exception e = expectThrows(IllegalStateException.class, () -> geoPoints.getValue()); assertEquals( diff --git a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java index a09639c0d90df..5fcb31cb3b64e 100644 --- a/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java +++ b/server/src/test/java/org/elasticsearch/index/fielddata/ScriptDocValuesLongsTests.java @@ -35,6 +35,9 @@ public void testLongs() throws IOException { assertEquals(values[d][0], (long) longs.get(0)); assertEquals(values[d][0], longField.get(Long.MIN_VALUE)); assertEquals(values[d][0], longField.get(0, Long.MIN_VALUE)); + + Exception e = expectThrows(IndexOutOfBoundsException.class, () -> { long l = longs.get(longs.size()); }); + assertEquals("A document doesn't have a value for a field at position [" + longs.size() + "]!", e.getMessage()); } else { Exception e = expectThrows(IllegalStateException.class, longs::getValue); assertEquals( diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongScriptDocValues.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongScriptDocValues.java index dfc1fd23c30eb..95fe4f7a17244 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongScriptDocValues.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongScriptDocValues.java @@ -22,6 +22,7 @@ public long getValue() { @Override public Long get(int index) { throwIfEmpty(); + throwIfBeyondLength(index); return supplier.getInternal(index); } From ed4dfc67343eaec3bca408e6694567e3ed04c4e3 Mon Sep 17 00:00:00 2001 From: Ievgen Degtiarenko Date: Tue, 18 Jun 2024 10:55:42 +0200 Subject: [PATCH 14/26] AwaitsFix: https://github.com/elastic/elasticsearch/issues/109841 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b14943c7c7b69..ca017b0ec30a4 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -67,6 +67,9 @@ tests: - class: "org.elasticsearch.xpack.shutdown.NodeShutdownReadinessIT" issue: "https://github.com/elastic/elasticsearch/issues/109838" method: "testShutdownReadinessService" +- class: "org.elasticsearch.packaging.test.PackageTests" + issue: "https://github.com/elastic/elasticsearch/issues/109841" + method: "test50Remove" # Examples: # From a3f4d51f7c8bd4b471b2c19ca7fee4d46413006e Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 18 Jun 2024 09:58:08 +0100 Subject: [PATCH 15/26] Fix fragment flag in `BaseNodesXContentResponse` (#109835) This response is a full object, not a fragment, so must implement `ChunkedToXContentObject`. --- .../nodes/BaseNodesXContentResponse.java | 4 +- .../nodes/BaseNodesXContentResponseTests.java | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 server/src/test/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponseTests.java diff --git a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java index ac193601212c1..3b0f246d8f30e 100644 --- a/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java +++ b/server/src/main/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponse.java @@ -11,8 +11,8 @@ import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.common.collect.Iterators; -import org.elasticsearch.common.xcontent.ChunkedToXContent; import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.common.xcontent.ChunkedToXContentObject; import org.elasticsearch.rest.action.RestActions; import org.elasticsearch.xcontent.ToXContent; @@ -21,7 +21,7 @@ public abstract class BaseNodesXContentResponse extends BaseNodesResponse implements - ChunkedToXContent { + ChunkedToXContentObject { protected BaseNodesXContentResponse(ClusterName clusterName, List nodes, List failures) { super(clusterName, nodes, failures); diff --git a/server/src/test/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponseTests.java b/server/src/test/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponseTests.java new file mode 100644 index 0000000000000..191505e3afbbf --- /dev/null +++ b/server/src/test/java/org/elasticsearch/action/support/nodes/BaseNodesXContentResponseTests.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.support.nodes; + +import org.elasticsearch.action.support.TransportAction; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeUtils; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.ToXContent; + +import java.util.Iterator; +import java.util.List; + +public class BaseNodesXContentResponseTests extends ESTestCase { + public void testFragmentFlag() { + final var node = DiscoveryNodeUtils.create("test"); + + class TestNodeResponse extends BaseNodeResponse { + protected TestNodeResponse(DiscoveryNode node) { + super(node); + } + } + + final var fullResponse = new BaseNodesXContentResponse<>(ClusterName.DEFAULT, List.of(new TestNodeResponse(node)), List.of()) { + @Override + protected Iterator xContentChunks(ToXContent.Params outerParams) { + return ChunkedToXContentHelper.singleChunk((b, p) -> b.startObject("content").endObject()); + } + + @Override + protected List readNodesFrom(StreamInput in) { + return TransportAction.localOnly(); + } + + @Override + protected void writeNodesTo(StreamOutput out, List nodes) { + TransportAction.localOnly(); + } + }; + + assertFalse(fullResponse.isFragment()); + + assertEquals(""" + { + "_nodes" : { + "total" : 1, + "successful" : 1, + "failed" : 0 + }, + "cluster_name" : "elasticsearch", + "content" : { } + }""", Strings.toString(fullResponse, true, false)); + } +} From b80b7399933351f573627eacbfbd8e6e85c40e2c Mon Sep 17 00:00:00 2001 From: Przemyslaw Gomulka Date: Tue, 18 Jun 2024 11:40:56 +0200 Subject: [PATCH 16/26] Provide document size reporter with MapperService (#109794) Instead of indexMode a mapper service is necessary to reliably determine if an index is a timeseries datastream --- docs/changelog/109794.yaml | 5 +++++ .../DocumentSizeObserverWithPipelinesIT.java | 4 ++-- .../indices/IndexingMemoryControllerIT.java | 3 ++- .../plugins/internal/DocumentSizeObserverIT.java | 6 +++--- .../elasticsearch/index/engine/EngineConfig.java | 10 +++++++++- .../org/elasticsearch/index/shard/IndexShard.java | 3 ++- .../plugins/internal/DocumentParsingProvider.java | 4 ++-- .../index/engine/InternalEngineTests.java | 6 ++++-- .../index/shard/IndexShardTests.java | 3 ++- .../index/shard/RefreshListenersTests.java | 3 ++- .../index/engine/EngineTestCase.java | 15 ++++++++++----- .../ccr/index/engine/FollowingEngineTests.java | 3 ++- 12 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 docs/changelog/109794.yaml diff --git a/docs/changelog/109794.yaml b/docs/changelog/109794.yaml new file mode 100644 index 0000000000000..d244c69a903ba --- /dev/null +++ b/docs/changelog/109794.yaml @@ -0,0 +1,5 @@ +pr: 109794 +summary: Provide document size reporter with `MapperService` +area: Infra/Metrics +type: bug +issues: [] diff --git a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java index 1621a235187a1..7bb875f8b6f69 100644 --- a/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java +++ b/modules/ingest-common/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverWithPipelinesIT.java @@ -12,7 +12,7 @@ import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.ingest.common.IngestCommonPlugin; import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.Plugin; @@ -102,7 +102,7 @@ public DocumentSizeObserver newDocumentSizeObserver() { @Override public DocumentSizeReporter newDocumentSizeReporter( String indexName, - IndexMode indexMode, + MapperService mapperService, DocumentSizeAccumulator documentSizeAccumulator ) { return DocumentSizeReporter.EMPTY_INSTANCE; diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/IndexingMemoryControllerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/IndexingMemoryControllerIT.java index 1c715beb04356..7e057c19ea82e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indices/IndexingMemoryControllerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indices/IndexingMemoryControllerIT.java @@ -81,7 +81,8 @@ EngineConfig engineConfigWithLargerIndexingMemory(EngineConfig config) { config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java b/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java index bf6c59a4c0a9b..58d1d7d88ec55 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/plugins/internal/DocumentSizeObserverIT.java @@ -9,10 +9,10 @@ package org.elasticsearch.plugins.internal; import org.elasticsearch.action.index.IndexRequest; -import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.engine.EngineFactory; import org.elasticsearch.index.engine.InternalEngine; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.plugins.EnginePlugin; import org.elasticsearch.plugins.IngestPlugin; @@ -104,7 +104,7 @@ public IndexResult index(Index index) throws IOException { DocumentSizeReporter documentParsingReporter = documentParsingProvider.newDocumentSizeReporter( shardId.getIndexName(), - IndexMode.STANDARD, + config().getMapperService(), DocumentSizeAccumulator.EMPTY_INSTANCE ); documentParsingReporter.onIndexingCompleted(index.parsedDoc()); @@ -136,7 +136,7 @@ public DocumentSizeObserver newDocumentSizeObserver() { @Override public DocumentSizeReporter newDocumentSizeReporter( String indexName, - IndexMode indexMode, + MapperService mapperService, DocumentSizeAccumulator documentSizeAccumulator ) { return new TestDocumentSizeReporter(indexName); diff --git a/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java b/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java index 7a817500c4ca5..51840d2fbfcdd 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java +++ b/server/src/main/java/org/elasticsearch/index/engine/EngineConfig.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.seqno.RetentionLeases; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.Store; @@ -51,6 +52,7 @@ public final class EngineConfig { private volatile boolean enableGcDeletes = true; private final TimeValue flushMergesAfter; private final String codecName; + private final MapperService mapperService; private final IndexStorePlugin.SnapshotCommitSupplier snapshotCommitSupplier; private final ThreadPool threadPool; private final Engine.Warmer warmer; @@ -163,7 +165,8 @@ public EngineConfig( Comparator leafSorter, LongSupplier relativeTimeInNanosSupplier, Engine.IndexCommitListener indexCommitListener, - boolean promotableToPrimary + boolean promotableToPrimary, + MapperService mapperService ) { this.shardId = shardId; this.indexSettings = indexSettings; @@ -176,6 +179,7 @@ public EngineConfig( this.codecService = codecService; this.eventListener = eventListener; codecName = indexSettings.getValue(INDEX_CODEC_SETTING); + this.mapperService = mapperService; // We need to make the indexing buffer for this shard at least as large // as the amount of memory that is available for all engines on the // local node so that decisions to flush segments to disk are made by @@ -436,4 +440,8 @@ public boolean isPromotableToPrimary() { public boolean getUseCompoundFile() { return useCompoundFile; } + + public MapperService getMapperService() { + return mapperService; + } } diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index dc2c4728fb857..b3f19b1b7a81d 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -3491,7 +3491,8 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) { isTimeBasedIndex ? TIMESERIES_LEAF_READERS_SORTER : null, relativeTimeInNanosSupplier, indexCommitListener, - routingEntry().isPromotableToPrimary() + routingEntry().isPromotableToPrimary(), + mapperService() ); } diff --git a/server/src/main/java/org/elasticsearch/plugins/internal/DocumentParsingProvider.java b/server/src/main/java/org/elasticsearch/plugins/internal/DocumentParsingProvider.java index 0e404ca03707f..6fe1e48b25272 100644 --- a/server/src/main/java/org/elasticsearch/plugins/internal/DocumentParsingProvider.java +++ b/server/src/main/java/org/elasticsearch/plugins/internal/DocumentParsingProvider.java @@ -8,7 +8,7 @@ package org.elasticsearch.plugins.internal; -import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.mapper.MapperService; /** * An interface to provide instances of document parsing observer and reporter @@ -36,7 +36,7 @@ default DocumentSizeObserver newFixedSizeDocumentObserver(long normalisedBytesPa */ default DocumentSizeReporter newDocumentSizeReporter( String indexName, - IndexMode indexMode, + MapperService mapperService, DocumentSizeAccumulator documentSizeAccumulator ) { return DocumentSizeReporter.EMPTY_INSTANCE; diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index d4ff35fee549e..a89ac5bc5b74e 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -3639,7 +3639,8 @@ public void testRecoverFromForeignTranslog() throws IOException { null, config.getRelativeTimeInNanosSupplier(), null, - true + true, + config.getMapperService() ); expectThrows(EngineCreationFailureException.class, () -> new InternalEngine(brokenConfig)); @@ -7320,7 +7321,8 @@ public void testNotWarmUpSearcherInEngineCtor() throws Exception { config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); try (InternalEngine engine = createEngine(configWithWarmer)) { assertThat(warmedUpReaders, empty()); diff --git a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java index 8398ece2536a1..d272aaab1b231 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/IndexShardTests.java @@ -4820,7 +4820,8 @@ public void testCloseShardWhileEngineIsWarming() throws Exception { config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); return new InternalEngine(configWithWarmer); }); diff --git a/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java b/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java index 7f22c9f9ccc2a..2b333277e2d4a 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/RefreshListenersTests.java @@ -156,7 +156,8 @@ public void onFailedEngine(String reason, @Nullable Exception e) { null, System::nanoTime, null, - true + true, + null ); engine = new InternalEngine(config); EngineTestCase.recoverFromTranslog(engine, (e, s) -> 0, Long.MAX_VALUE); diff --git a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java index 3a7a31e761e7f..1c7cabb541581 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/engine/EngineTestCase.java @@ -268,7 +268,8 @@ public static EngineConfig copy(EngineConfig config, LongSupplier globalCheckpoi config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); } @@ -299,7 +300,8 @@ public EngineConfig copy(EngineConfig config, Analyzer analyzer) { config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); } @@ -330,7 +332,8 @@ public EngineConfig copy(EngineConfig config, MergePolicy mergePolicy) { config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); } @@ -854,7 +857,8 @@ public EngineConfig config( null, this::relativeTimeInNanos, indexCommitListener, - true + true, + null ); } @@ -893,7 +897,8 @@ protected EngineConfig config(EngineConfig config, Store store, Path translogPat config.getLeafSorter(), config.getRelativeTimeInNanosSupplier(), config.getIndexCommitListener(), - config.isPromotableToPrimary() + config.isPromotableToPrimary(), + config.getMapperService() ); } diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java index 7c9b1b5efbde2..fbddfc7683d2f 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/index/engine/FollowingEngineTests.java @@ -252,7 +252,8 @@ public void onFailedEngine(String reason, Exception e) { null, System::nanoTime, null, - true + true, + null ); } From 28c1cc63c41dd5d64f71c2df494e3630269b1142 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 18 Jun 2024 11:46:56 +0200 Subject: [PATCH 17/26] Wrap lucene queries in GeoGridQueryBuilder with ConstantScoreQuery (#109780) --- .../xpack/spatial/index/query/GeoGridQueryBuilder.java | 3 ++- .../spatial/index/query/GeoGridQueryBuilderTests.java | 8 ++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilder.java b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilder.java index 843842cf863c7..8e73fc37f96ba 100644 --- a/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilder.java +++ b/x-pack/plugin/spatial/src/main/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilder.java @@ -9,6 +9,7 @@ import org.apache.lucene.geo.GeoEncodingUtils; import org.apache.lucene.geo.LatLonGeometry; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchParseException; @@ -284,7 +285,7 @@ public Query doToQuery(SearchExecutionContext context) { throw new QueryShardException(context, "failed to find geo field [" + fieldName + "]"); } } - return grid.toQuery(context, fieldName, fieldType, gridId); + return new ConstantScoreQuery(grid.toQuery(context, fieldName, fieldType, gridId)); } @Override diff --git a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilderTests.java b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilderTests.java index 83eed8042e4de..5aff04520b5b8 100644 --- a/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilderTests.java +++ b/x-pack/plugin/spatial/src/test/java/org/elasticsearch/xpack/spatial/index/query/GeoGridQueryBuilderTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.spatial.index.query; +import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -118,8 +119,11 @@ protected void doAssertLuceneQuery(GeoGridQueryBuilder queryBuilder, Query query final MappedFieldType fieldType = context.getFieldType(queryBuilder.fieldName()); if (fieldType == null) { assertTrue("Found no indexed geo query.", query instanceof MatchNoDocsQuery); - } else if (fieldType.hasDocValues()) { - assertEquals(IndexOrDocValuesQuery.class, query.getClass()); + } else { + assertEquals(ConstantScoreQuery.class, query.getClass()); + if (fieldType.hasDocValues()) { + assertEquals(IndexOrDocValuesQuery.class, ((ConstantScoreQuery) query).getQuery().getClass()); + } } } From 3bfecc8d22b3e48aef2b950ca41846dfa0fa0993 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 18 Jun 2024 06:22:07 -0400 Subject: [PATCH 18/26] ESQL: Move serialization for Literal and Order (#109709) This moves the serialization for `Literal` and `Order` into the classes themselves to line up better with how everything else in Elasticsearch is serialized. --- .../xpack/esql/core/expression/Literal.java | 85 +++++++++++++- .../esql/core/expression/LiteralTests.java | 8 +- .../xpack/esql/expression/Order.java | 31 ++++- .../xpack/esql/io/stream/PlanNamedTypes.java | 108 ++---------------- .../AbstractExpressionSerializationTests.java | 11 +- .../expression/LiteralSerializationTests.java | 37 ++++++ .../expression/OrderSerializationTests.java | 54 +++++++++ .../esql/io/stream/PlanNamedTypesTests.java | 18 --- 8 files changed, 233 insertions(+), 119 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/LiteralSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/OrderSerializationTests.java diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java index 68780f5b32e9c..20cdbaf6acdbf 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java @@ -6,17 +6,35 @@ */ package org.elasticsearch.xpack.esql.core.expression; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.PlanStreamInput; +import java.io.IOException; +import java.util.List; import java.util.Objects; +import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; +import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; +import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN; +import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; + /** - * SQL Literal or constant. + * Literal or constant. */ public class Literal extends LeafExpression { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "Literal", + Literal::readFrom + ); public static final Literal TRUE = new Literal(Source.EMPTY, Boolean.TRUE, DataType.BOOLEAN); public static final Literal FALSE = new Literal(Source.EMPTY, Boolean.FALSE, DataType.BOOLEAN); @@ -31,6 +49,25 @@ public Literal(Source source, Object value, DataType dataType) { this.value = value; } + private static Literal readFrom(StreamInput in) throws IOException { + Source source = Source.readFrom((StreamInput & PlanStreamInput) in); + Object value = in.readGenericValue(); + DataType dataType = DataType.readFrom(in); + return new Literal(source, mapToLiteralValue(in, dataType, value), dataType); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + out.writeGenericValue(mapFromLiteralValue(out, dataType, value)); + dataType.writeTo(out); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + @Override protected NodeInfo info() { return NodeInfo.create(this, Literal::new, value, dataType); @@ -112,4 +149,50 @@ public static Literal of(Expression foldable) { public static Literal of(Expression source, Object value) { return new Literal(source.source(), value, source.dataType()); } + + /** + * Not all literal values are currently supported in StreamInput/StreamOutput as generic values. + * This mapper allows for addition of new and interesting values without (yet) adding to StreamInput/Output. + * This makes the most sense during the pre-GA version of ESQL. When we get near GA we might want to push this down. + *

+ * For the spatial point type support we need to care about the fact that 8.12.0 uses encoded longs for serializing + * while 8.13 uses WKB. + */ + private static Object mapFromLiteralValue(StreamOutput out, DataType dataType, Object value) { + if (dataType == GEO_POINT || dataType == CARTESIAN_POINT) { + // In 8.12.0 we serialized point literals as encoded longs, but now use WKB + if (out.getTransportVersion().before(TransportVersions.V_8_13_0)) { + if (value instanceof List list) { + return list.stream().map(v -> mapFromLiteralValue(out, dataType, v)).toList(); + } + return wkbAsLong(dataType, (BytesRef) value); + } + } + return value; + } + + /** + * Not all literal values are currently supported in StreamInput/StreamOutput as generic values. + * This mapper allows for addition of new and interesting values without (yet) changing StreamInput/Output. + */ + private static Object mapToLiteralValue(StreamInput in, DataType dataType, Object value) { + if (dataType == GEO_POINT || dataType == CARTESIAN_POINT) { + // In 8.12.0 we serialized point literals as encoded longs, but now use WKB + if (in.getTransportVersion().before(TransportVersions.V_8_13_0)) { + if (value instanceof List list) { + return list.stream().map(v -> mapToLiteralValue(in, dataType, v)).toList(); + } + return longAsWKB(dataType, (Long) value); + } + } + return value; + } + + private static BytesRef longAsWKB(DataType dataType, long encoded) { + return dataType == GEO_POINT ? GEO.longAsWkb(encoded) : CARTESIAN.longAsWkb(encoded); + } + + private static long wkbAsLong(DataType dataType, BytesRef wkb) { + return dataType == GEO_POINT ? GEO.wkbAsLong(wkb) : CARTESIAN.wkbAsLong(wkb); + } } diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java index 7e57e8f358ae1..a4c67a8076479 100644 --- a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java +++ b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java @@ -76,6 +76,10 @@ protected Literal copy(Literal instance) { @Override protected Literal mutate(Literal instance) { + return mutateLiteral(instance); + } + + public static Literal mutateLiteral(Literal instance) { List> mutators = new ArrayList<>(); // Changing the location doesn't count as mutation because..... it just doesn't, ok?! // Change the value to another valid value @@ -116,7 +120,7 @@ public void testReplaceChildren() { assertEquals("this type of node doesn't have any children to replace", e.getMessage()); } - private Object randomValueOfTypeOtherThan(Object original, DataType type) { + private static Object randomValueOfTypeOtherThan(Object original, DataType type) { for (ValueAndCompatibleTypes gen : GENERATORS) { if (gen.validDataTypes.get(0) == type) { return randomValueOtherThan(original, () -> DataTypeConverter.convert(gen.valueSupplier.get(), type)); @@ -125,7 +129,7 @@ private Object randomValueOfTypeOtherThan(Object original, DataType type) { throw new IllegalArgumentException("No native generator for [" + type + "]"); } - private List validReplacementDataTypes(Object value, DataType type) { + private static List validReplacementDataTypes(Object value, DataType type) { List validDataTypes = new ArrayList<>(); List options = Arrays.asList(BYTE, SHORT, INTEGER, LONG, FLOAT, DOUBLE, BOOLEAN); for (DataType candidate : options) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Order.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Order.java index 10800a2394e8f..11a98d3a11504 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Order.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/Order.java @@ -7,18 +7,48 @@ package org.elasticsearch.xpack.esql.expression; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; +import java.io.IOException; import java.util.List; public class Order extends org.elasticsearch.xpack.esql.core.expression.Order { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Order", Order::new); + public Order(Source source, Expression child, OrderDirection direction, NullsPosition nulls) { super(source, child, direction, nulls); } + public Order(StreamInput in) throws IOException { + this( + Source.readFrom((PlanStreamInput) in), + ((PlanStreamInput) in).readExpression(), + in.readEnum(org.elasticsearch.xpack.esql.core.expression.Order.OrderDirection.class), + in.readEnum(org.elasticsearch.xpack.esql.core.expression.Order.NullsPosition.class) + ); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + Source.EMPTY.writeTo(out); + ((PlanStreamOutput) out).writeExpression(child()); + out.writeEnum(direction()); + out.writeEnum(nullsPosition()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + @Override protected TypeResolution resolveType() { if (DataType.isString(child().dataType())) { @@ -36,5 +66,4 @@ public Order replaceChildren(List newChildren) { protected NodeInfo info() { return NodeInfo.create(this, Order::new, child(), direction(), nullsPosition()); } - } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java index 795790949f665..52989d2d2a277 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.io.stream; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.TriFunction; @@ -46,7 +45,6 @@ import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.core.plan.logical.OrderBy; import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; @@ -178,10 +176,6 @@ import java.util.function.Function; import static java.util.Map.entry; -import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; -import static org.elasticsearch.xpack.esql.core.type.DataType.GEO_POINT; -import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.CARTESIAN; -import static org.elasticsearch.xpack.esql.core.util.SpatialCoordinateTypes.GEO; import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.Entry.of; import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.PlanReader.readerFromPlanReader; import static org.elasticsearch.xpack.esql.io.stream.PlanNameRegistry.PlanWriter.writerFromPlanWriter; @@ -363,10 +357,7 @@ public static List namedTypeEntries() { of(ScalarFunction.class, MvSort.class, PlanNamedTypes::writeMvSort, PlanNamedTypes::readMvSort), of(ScalarFunction.class, MvSlice.class, PlanNamedTypes::writeMvSlice, PlanNamedTypes::readMvSlice), of(ScalarFunction.class, MvSum.class, PlanNamedTypes::writeMvFunction, PlanNamedTypes::readMvFunction), - of(ScalarFunction.class, MvZip.class, PlanNamedTypes::writeMvZip, PlanNamedTypes::readMvZip), - // Expressions (other) - of(Expression.class, Literal.class, PlanNamedTypes::writeLiteral, PlanNamedTypes::readLiteral), - of(Expression.class, Order.class, PlanNamedTypes::writeOrder, PlanNamedTypes::readOrder) + of(ScalarFunction.class, MvZip.class, PlanNamedTypes::writeMvZip, PlanNamedTypes::readMvZip) ); List entries = new ArrayList<>(declared); @@ -378,6 +369,8 @@ public static List namedTypeEntries() { entries.add(of(Expression.class, e)); } entries.add(of(Expression.class, UnsupportedAttribute.ENTRY)); + entries.add(of(Expression.class, Literal.ENTRY)); + entries.add(of(Expression.class, org.elasticsearch.xpack.esql.expression.Order.ENTRY)); return entries; } @@ -678,14 +671,14 @@ static OrderExec readOrderExec(PlanStreamInput in) throws IOException { return new OrderExec( Source.readFrom(in), in.readPhysicalPlanNode(), - in.readCollectionAsList(readerFromPlanReader(PlanNamedTypes::readOrder)) + in.readCollectionAsList(org.elasticsearch.xpack.esql.expression.Order::new) ); } static void writeOrderExec(PlanStreamOutput out, OrderExec orderExec) throws IOException { Source.EMPTY.writeTo(out); out.writePhysicalPlanNode(orderExec.child()); - out.writeCollection(orderExec.order(), writerFromPlanWriter(PlanNamedTypes::writeOrder)); + out.writeCollection(orderExec.order()); } static ProjectExec readProjectExec(PlanStreamInput in) throws IOException { @@ -731,7 +724,7 @@ static TopNExec readTopNExec(PlanStreamInput in) throws IOException { return new TopNExec( Source.readFrom(in), in.readPhysicalPlanNode(), - in.readCollectionAsList(readerFromPlanReader(PlanNamedTypes::readOrder)), + in.readCollectionAsList(org.elasticsearch.xpack.esql.expression.Order::new), in.readNamed(Expression.class), in.readOptionalVInt() ); @@ -740,7 +733,7 @@ static TopNExec readTopNExec(PlanStreamInput in) throws IOException { static void writeTopNExec(PlanStreamOutput out, TopNExec topNExec) throws IOException { Source.EMPTY.writeTo(out); out.writePhysicalPlanNode(topNExec.child()); - out.writeCollection(topNExec.order(), writerFromPlanWriter(PlanNamedTypes::writeOrder)); + out.writeCollection(topNExec.order()); out.writeExpression(topNExec.limit()); out.writeOptionalVInt(topNExec.estimatedRowSize()); } @@ -969,14 +962,14 @@ static OrderBy readOrderBy(PlanStreamInput in) throws IOException { return new OrderBy( Source.readFrom(in), in.readLogicalPlanNode(), - in.readCollectionAsList(readerFromPlanReader(PlanNamedTypes::readOrder)) + in.readCollectionAsList(org.elasticsearch.xpack.esql.expression.Order::new) ); } static void writeOrderBy(PlanStreamOutput out, OrderBy order) throws IOException { Source.EMPTY.writeTo(out); out.writeLogicalPlanNode(order.child()); - out.writeCollection(order.order(), writerFromPlanWriter(PlanNamedTypes::writeOrder)); + out.writeCollection(order.order()); } static Project readProject(PlanStreamInput in) throws IOException { @@ -993,15 +986,15 @@ static TopN readTopN(PlanStreamInput in) throws IOException { return new TopN( Source.readFrom(in), in.readLogicalPlanNode(), - in.readCollectionAsList(readerFromPlanReader(PlanNamedTypes::readOrder)), - in.readNamed(Expression.class) + in.readCollectionAsList(org.elasticsearch.xpack.esql.expression.Order::new), + in.readExpression() ); } static void writeTopN(PlanStreamOutput out, TopN topN) throws IOException { Source.EMPTY.writeTo(out); out.writeLogicalPlanNode(topN.child()); - out.writeCollection(topN.order(), writerFromPlanWriter(PlanNamedTypes::writeOrder)); + out.writeCollection(topN.order()); out.writeExpression(topN.limit()); } @@ -1552,83 +1545,6 @@ static void writeMvConcat(PlanStreamOutput out, MvConcat fn) throws IOException out.writeExpression(fn.right()); } - // -- Expressions (other) - - static Literal readLiteral(PlanStreamInput in) throws IOException { - Source source = Source.readFrom(in); - Object value = in.readGenericValue(); - DataType dataType = DataType.readFrom(in); - return new Literal(source, mapToLiteralValue(in, dataType, value), dataType); - } - - static void writeLiteral(PlanStreamOutput out, Literal literal) throws IOException { - Source.EMPTY.writeTo(out); - out.writeGenericValue(mapFromLiteralValue(out, literal.dataType(), literal.value())); - out.writeString(literal.dataType().typeName()); - } - - /** - * Not all literal values are currently supported in StreamInput/StreamOutput as generic values. - * This mapper allows for addition of new and interesting values without (yet) adding to StreamInput/Output. - * This makes the most sense during the pre-GA version of ESQL. When we get near GA we might want to push this down. - *

- * For the spatial point type support we need to care about the fact that 8.12.0 uses encoded longs for serializing - * while 8.13 uses WKB. - */ - private static Object mapFromLiteralValue(PlanStreamOutput out, DataType dataType, Object value) { - if (dataType == GEO_POINT || dataType == CARTESIAN_POINT) { - // In 8.12.0 we serialized point literals as encoded longs, but now use WKB - if (out.getTransportVersion().before(TransportVersions.V_8_13_0)) { - if (value instanceof List list) { - return list.stream().map(v -> mapFromLiteralValue(out, dataType, v)).toList(); - } - return wkbAsLong(dataType, (BytesRef) value); - } - } - return value; - } - - /** - * Not all literal values are currently supported in StreamInput/StreamOutput as generic values. - * This mapper allows for addition of new and interesting values without (yet) changing StreamInput/Output. - */ - private static Object mapToLiteralValue(PlanStreamInput in, DataType dataType, Object value) { - if (dataType == GEO_POINT || dataType == CARTESIAN_POINT) { - // In 8.12.0 we serialized point literals as encoded longs, but now use WKB - if (in.getTransportVersion().before(TransportVersions.V_8_13_0)) { - if (value instanceof List list) { - return list.stream().map(v -> mapToLiteralValue(in, dataType, v)).toList(); - } - return longAsWKB(dataType, (Long) value); - } - } - return value; - } - - private static BytesRef longAsWKB(DataType dataType, long encoded) { - return dataType == GEO_POINT ? GEO.longAsWkb(encoded) : CARTESIAN.longAsWkb(encoded); - } - - private static long wkbAsLong(DataType dataType, BytesRef wkb) { - return dataType == GEO_POINT ? GEO.wkbAsLong(wkb) : CARTESIAN.wkbAsLong(wkb); - } - - static Order readOrder(PlanStreamInput in) throws IOException { - return new org.elasticsearch.xpack.esql.expression.Order( - Source.readFrom(in), - in.readNamed(Expression.class), - in.readEnum(Order.OrderDirection.class), - in.readEnum(Order.NullsPosition.class) - ); - } - - static void writeOrder(PlanStreamOutput out, Order order) throws IOException { - Source.EMPTY.writeTo(out); - out.writeExpression(order.child()); - out.writeEnum(order.direction()); - out.writeEnum(order.nullsPosition()); - } - // -- ancillary supporting classes of plan nodes, etc static EsQueryExec.FieldSort readFieldSort(PlanStreamInput in) throws IOException { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java index a5ce5e004b194..d0203419f01ab 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/AbstractExpressionSerializationTests.java @@ -31,6 +31,7 @@ import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.sameInstance; public abstract class AbstractExpressionSerializationTests extends AbstractWireTestCase { public static Source randomSource() { @@ -59,13 +60,21 @@ protected final T copyInstance(T instance, TransportVersion version) throws IOEx PlanStreamInput pin = new PlanStreamInput(in, new PlanNameRegistry(), in.namedWriteableRegistry(), config); @SuppressWarnings("unchecked") T deser = (T) pin.readNamedWriteable(Expression.class); - assertThat(deser.source(), equalTo(instance.source())); + if (alwaysEmptySource()) { + assertThat(deser.source(), sameInstance(Source.EMPTY)); + } else { + assertThat(deser.source(), equalTo(instance.source())); + } return deser; }, version ); } + protected boolean alwaysEmptySource() { + return false; + } + protected abstract List getNamedWriteables(); @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/LiteralSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/LiteralSerializationTests.java new file mode 100644 index 0000000000000..39e18bf9761ec --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/LiteralSerializationTests.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.LiteralTests; + +import java.io.IOException; +import java.util.List; + +public class LiteralSerializationTests extends AbstractExpressionSerializationTests { + @Override + protected Literal createTestInstance() { + return LiteralTests.randomLiteral(); + } + + @Override + protected Literal mutateInstance(Literal instance) throws IOException { + return LiteralTests.mutateLiteral(instance); + } + + @Override + protected List getNamedWriteables() { + return List.of(Literal.ENTRY); + } + + @Override + protected boolean alwaysEmptySource() { + return true; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/OrderSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/OrderSerializationTests.java new file mode 100644 index 0000000000000..dd2671f4cf86d --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/OrderSerializationTests.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; + +import java.io.IOException; +import java.util.List; + +public class OrderSerializationTests extends AbstractExpressionSerializationTests { + @Override + protected Order createTestInstance() { + return new Order(randomSource(), randomChild(), randomDirection(), randomNulls()); + } + + private static org.elasticsearch.xpack.esql.core.expression.Order.OrderDirection randomDirection() { + return randomFrom(org.elasticsearch.xpack.esql.core.expression.Order.OrderDirection.values()); + } + + private static org.elasticsearch.xpack.esql.core.expression.Order.NullsPosition randomNulls() { + return randomFrom(org.elasticsearch.xpack.esql.core.expression.Order.NullsPosition.values()); + } + + @Override + protected Order mutateInstance(Order instance) throws IOException { + Source source = instance.source(); + Expression child = instance.child(); + org.elasticsearch.xpack.esql.core.expression.Order.OrderDirection direction = instance.direction(); + org.elasticsearch.xpack.esql.core.expression.Order.NullsPosition nulls = instance.nullsPosition(); + switch (between(0, 2)) { + case 0 -> child = randomValueOtherThan(child, AbstractExpressionSerializationTests::randomChild); + case 1 -> direction = randomValueOtherThan(direction, OrderSerializationTests::randomDirection); + case 2 -> nulls = randomValueOtherThan(nulls, OrderSerializationTests::randomNulls); + } + return new Order(source, child, direction, nulls); + } + + @Override + protected List getNamedWriteables() { + return List.of(Order.ENTRY); + } + + @Override + protected boolean alwaysEmptySource() { + return true; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java index 2278be659c538..54490ba306da8 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypesTests.java @@ -308,24 +308,6 @@ public void testPowSimple() throws IOException { EqualsHashCodeTestUtils.checkEqualsAndHashCode(orig, unused -> deser); } - public void testLiteralSimple() throws IOException { - var orig = new Literal(Source.EMPTY, 1, DataType.INTEGER); - BytesStreamOutput bso = new BytesStreamOutput(); - PlanStreamOutput out = new PlanStreamOutput(bso, planNameRegistry, null); - PlanNamedTypes.writeLiteral(out, orig); - var deser = PlanNamedTypes.readLiteral(planStreamInput(bso)); - EqualsHashCodeTestUtils.checkEqualsAndHashCode(orig, unused -> deser); - } - - public void testOrderSimple() throws IOException { - var orig = new Order(Source.EMPTY, field("val", DataType.INTEGER), Order.OrderDirection.ASC, Order.NullsPosition.FIRST); - BytesStreamOutput bso = new BytesStreamOutput(); - PlanStreamOutput out = new PlanStreamOutput(bso, planNameRegistry, null); - PlanNamedTypes.writeOrder(out, orig); - var deser = (Order) PlanNamedTypes.readOrder(planStreamInput(bso)); - EqualsHashCodeTestUtils.checkEqualsAndHashCode(orig, unused -> deser); - } - public void testFieldSortSimple() throws IOException { var orig = new EsQueryExec.FieldSort(field("val", DataType.LONG), Order.OrderDirection.ASC, Order.NullsPosition.FIRST); BytesStreamOutput bso = new BytesStreamOutput(); From b35f0ed48d0325127bde45b17230218b7686e681 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Tue, 18 Jun 2024 06:23:11 -0400 Subject: [PATCH 19/26] ESQL: Make a table of all inline casts (#109713) This adds a test that generates `docs/reference/esql/functions/kibana/inline_cast.json` which is a json object who's keys are the names of valid inline casts and who's values are the resulting data types. I also moved one of the maps we use to make the inline casts to `DataType`, which is a place where we want it. --- .../esql/functions/kibana/inline_cast.json | 19 ++++++ .../xpack/esql/core/type/DataType.java | 20 ++++++ .../xpack/esql/parser/ExpressionBuilder.java | 3 +- .../xpack/esql/type/EsqlDataTypes.java | 15 ----- .../xpack/esql/analysis/ParsingTests.java | 62 +++++++++++++++++++ 5 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 docs/reference/esql/functions/kibana/inline_cast.json diff --git a/docs/reference/esql/functions/kibana/inline_cast.json b/docs/reference/esql/functions/kibana/inline_cast.json new file mode 100644 index 0000000000000..f71572d3d651c --- /dev/null +++ b/docs/reference/esql/functions/kibana/inline_cast.json @@ -0,0 +1,19 @@ +{ + "bool" : "to_boolean", + "boolean" : "to_boolean", + "cartesian_point" : "to_cartesianpoint", + "cartesian_shape" : "to_cartesianshape", + "datetime" : "to_datetime", + "double" : "to_double", + "geo_point" : "to_geopoint", + "geo_shape" : "to_geoshape", + "int" : "to_integer", + "integer" : "to_integer", + "ip" : "to_ip", + "keyword" : "to_string", + "long" : "to_long", + "string" : "to_string", + "text" : "to_string", + "unsigned_long" : "to_unsigned_long", + "version" : "to_version" +} \ No newline at end of file diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 9d6a325a6028f..53b191dbd6332 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -18,6 +18,8 @@ import java.util.Comparator; import java.util.Locale; import java.util.Map; +import java.util.Set; +import java.util.function.Function; import java.util.stream.Stream; import static java.util.stream.Collectors.toMap; @@ -144,6 +146,15 @@ public enum DataType { ES_TO_TYPE = Collections.unmodifiableMap(map); } + private static final Map NAME_OR_ALIAS_TO_TYPE; + static { + Map map = DataType.types().stream().collect(toMap(DataType::typeName, Function.identity())); + map.put("bool", BOOLEAN); + map.put("int", INTEGER); + map.put("string", KEYWORD); + NAME_OR_ALIAS_TO_TYPE = Collections.unmodifiableMap(map); + } + public static Collection types() { return TYPES; } @@ -282,4 +293,13 @@ public static DataType readFrom(StreamInput in) throws IOException { } return dataType; } + + public static Set namesAndAliases() { + return NAME_OR_ALIAS_TO_TYPE.keySet(); + } + + public static DataType fromNameOrAlias(String typeName) { + DataType type = NAME_OR_ALIAS_TO_TYPE.get(typeName.toLowerCase(Locale.ROOT)); + return type != null ? type : UNSUPPORTED; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java index 59801e59555b5..41db2aa54387b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/ExpressionBuilder.java @@ -58,7 +58,6 @@ import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThan; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.LessThanOrEqual; import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; -import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.math.BigInteger; import java.time.Duration; @@ -549,7 +548,7 @@ public Expression visitInlineCast(EsqlBaseParser.InlineCastContext ctx) { @Override public DataType visitToDataType(EsqlBaseParser.ToDataTypeContext ctx) { String typeName = visitIdentifier(ctx.identifier()); - DataType dataType = EsqlDataTypes.fromNameOrAlias(typeName); + DataType dataType = DataType.fromNameOrAlias(typeName); if (dataType == DataType.UNSUPPORTED) { throw new ParsingException(source(ctx), "Unknown data type named [{}]", typeName); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java index e48b46758f36c..de97d6925e002 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypes.java @@ -12,7 +12,6 @@ import java.util.Collections; import java.util.Locale; import java.util.Map; -import java.util.function.Function; import static java.util.stream.Collectors.toMap; import static java.util.stream.Collectors.toUnmodifiableMap; @@ -52,15 +51,6 @@ public final class EsqlDataTypes { ES_TO_TYPE = Collections.unmodifiableMap(map); } - private static final Map NAME_OR_ALIAS_TO_TYPE; - static { - Map map = DataType.types().stream().collect(toMap(DataType::typeName, Function.identity())); - map.put("bool", BOOLEAN); - map.put("int", INTEGER); - map.put("string", KEYWORD); - NAME_OR_ALIAS_TO_TYPE = Collections.unmodifiableMap(map); - } - private EsqlDataTypes() {} public static DataType fromTypeName(String name) { @@ -72,11 +62,6 @@ public static DataType fromName(String name) { return type != null ? type : UNSUPPORTED; } - public static DataType fromNameOrAlias(String typeName) { - DataType type = NAME_OR_ALIAS_TO_TYPE.get(typeName.toLowerCase(Locale.ROOT)); - return type != null ? type : UNSUPPORTED; - } - public static DataType fromJava(Object value) { if (value == null) { return NULL; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java index 223ee08316479..27a42f79e39ff 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/ParsingTests.java @@ -7,17 +7,36 @@ package org.elasticsearch.xpack.esql.analysis; +import org.elasticsearch.core.PathUtils; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.esql.core.ParsingException; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.FunctionDefinition; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.index.IndexResolution; +import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.TypesTests; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.logical.Row; +import org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyPolicyResolution; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class ParsingTests extends ESTestCase { private static final String INDEX_NAME = "test"; @@ -53,6 +72,49 @@ public void testLeastFunctionInvalidInputs() { assertEquals("1:23: error building [least]: expects at least one argument", error("row a = 1 | eval x = least()")); } + /** + * Tests the inline cast syntax {@code ::} for all supported types and + * builds a little json report of the valid types. + */ + public void testInlineCast() throws IOException { + EsqlFunctionRegistry registry = new EsqlFunctionRegistry(); + Path dir = PathUtils.get(System.getProperty("java.io.tmpdir")).resolve("esql").resolve("functions").resolve("kibana"); + Files.createDirectories(dir); + Path file = dir.resolve("inline_cast.json"); + try (XContentBuilder report = new XContentBuilder(JsonXContent.jsonXContent, Files.newOutputStream(file))) { + report.humanReadable(true).prettyPrint(); + report.startObject(); + List namesAndAliases = new ArrayList<>(DataType.namesAndAliases()); + Collections.sort(namesAndAliases); + for (String nameOrAlias : namesAndAliases) { + DataType expectedType = DataType.fromNameOrAlias(nameOrAlias); + if (expectedType == DataType.TEXT) { + expectedType = DataType.KEYWORD; + } + if (EsqlDataTypeConverter.converterFunctionFactory(expectedType) == null) { + continue; + } + LogicalPlan plan = parser.createStatement("ROW a = 1::" + nameOrAlias); + Row row = as(plan, Row.class); + assertThat(row.fields(), hasSize(1)); + Expression functionCall = row.fields().get(0).child(); + assertThat(functionCall.dataType(), equalTo(expectedType)); + report.field(nameOrAlias, functionName(registry, functionCall)); + } + report.endObject(); + } + logger.info("Wrote to file: {}", file); + } + + private String functionName(EsqlFunctionRegistry registry, Expression functionCall) { + for (FunctionDefinition def : registry.listFunctions()) { + if (functionCall.getClass().equals(def.clazz())) { + return def.name(); + } + } + throw new IllegalArgumentException("can't find name for " + functionCall); + } + private String error(String query) { ParsingException e = expectThrows(ParsingException.class, () -> defaultAnalyzer.analyze(parser.createStatement(query))); String message = e.getMessage(); From addad1e6c07399fbcb9ba10ba8ba9f96d55ab358 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 18 Jun 2024 13:01:50 +0200 Subject: [PATCH 20/26] Remove unused+deprecated code from UpdateRequest (#109246) These aren't used any longer so lets remove them. --- .../action/update/UpdateRequest.java | 161 ------------------ 1 file changed, 161 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/update/UpdateRequest.java b/server/src/main/java/org/elasticsearch/action/update/UpdateRequest.java index 36b6cc6aa9964..2cd5258bf4376 100644 --- a/server/src/main/java/org/elasticsearch/action/update/UpdateRequest.java +++ b/server/src/main/java/org/elasticsearch/action/update/UpdateRequest.java @@ -30,7 +30,6 @@ import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.script.Script; -import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.ObjectParser; @@ -42,7 +41,6 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; -import java.util.HashMap; import java.util.Map; import static org.elasticsearch.action.ValidateActions.addValidationError; @@ -269,165 +267,6 @@ public UpdateRequest script(Script script) { return this; } - /** - * @deprecated Use {@link #script()} instead - */ - @Deprecated - public String scriptString() { - return this.script == null ? null : this.script.getIdOrCode(); - } - - /** - * @deprecated Use {@link #script()} instead - */ - @Deprecated - public ScriptType scriptType() { - return this.script == null ? null : this.script.getType(); - } - - /** - * @deprecated Use {@link #script()} instead - */ - @Deprecated - public Map scriptParams() { - return this.script == null ? null : this.script.getParams(); - } - - /** - * The script to execute. Note, make sure not to send different script each - * times and instead use script params if possible with the same - * (automatically compiled) script. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest script(String script, ScriptType scriptType) { - updateOrCreateScript(script, scriptType, null, null); - return this; - } - - /** - * The script to execute. Note, make sure not to send different script each - * times and instead use script params if possible with the same - * (automatically compiled) script. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest script(String script) { - updateOrCreateScript(script, ScriptType.INLINE, null, null); - return this; - } - - /** - * The language of the script to execute. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest scriptLang(String scriptLang) { - updateOrCreateScript(null, null, scriptLang, null); - return this; - } - - /** - * @deprecated Use {@link #script()} instead - */ - @Deprecated - public String scriptLang() { - return script == null ? null : script.getLang(); - } - - /** - * Add a script parameter. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest addScriptParam(String name, Object value) { - Script script = script(); - if (script == null) { - HashMap scriptParams = new HashMap<>(); - scriptParams.put(name, value); - updateOrCreateScript(null, null, null, scriptParams); - } else { - Map scriptParams = script.getParams(); - if (scriptParams == null) { - scriptParams = new HashMap<>(); - scriptParams.put(name, value); - updateOrCreateScript(null, null, null, scriptParams); - } else { - scriptParams.put(name, value); - } - } - return this; - } - - /** - * Sets the script parameters to use with the script. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest scriptParams(Map scriptParams) { - updateOrCreateScript(null, null, null, scriptParams); - return this; - } - - private void updateOrCreateScript(String scriptContent, ScriptType type, String lang, Map params) { - Script script = script(); - if (script == null) { - script = new Script(type == null ? ScriptType.INLINE : type, lang, scriptContent == null ? "" : scriptContent, params); - } else { - String newScriptContent = scriptContent == null ? script.getIdOrCode() : scriptContent; - ScriptType newScriptType = type == null ? script.getType() : type; - String newScriptLang = lang == null ? script.getLang() : lang; - Map newScriptParams = params == null ? script.getParams() : params; - script = new Script(newScriptType, newScriptLang, newScriptContent, newScriptParams); - } - script(script); - } - - /** - * The script to execute. Note, make sure not to send different script each - * times and instead use script params if possible with the same - * (automatically compiled) script. - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest script(String script, ScriptType scriptType, @Nullable Map scriptParams) { - this.script = new Script(scriptType, Script.DEFAULT_SCRIPT_LANG, script, scriptParams); - return this; - } - - /** - * The script to execute. Note, make sure not to send different script each - * times and instead use script params if possible with the same - * (automatically compiled) script. - * - * @param script - * The script to execute - * @param scriptLang - * The script language - * @param scriptType - * The script type - * @param scriptParams - * The script parameters - * - * @deprecated Use {@link #script(Script)} instead - */ - @Deprecated - public UpdateRequest script( - String script, - @Nullable String scriptLang, - ScriptType scriptType, - @Nullable Map scriptParams - ) { - this.script = new Script(scriptType, scriptLang, script, scriptParams); - return this; - } - /** * Indicate that _source should be returned with every hit, with an * "include" and/or "exclude" set which can include simple wildcard From 3ebb048093ce9af34a1f57482a657c311af604d8 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Tue, 18 Jun 2024 08:18:25 -0400 Subject: [PATCH 21/26] [ML] Avoid InferenceRunner deadlock (#109551) InferenceStep now passes a Listener to InferenceRunner. InferenceRunner will chain the listener in the `run` method such that the nested model loading and inference happen asynchronously without blocking threads. Relate #109134 Co-authored-by: Elastic Machine --- docs/changelog/109551.yaml | 5 ++ .../dataframe/DataFrameAnalyticsManager.java | 3 +- .../dataframe/inference/InferenceRunner.java | 74 +++++++++------- .../ml/dataframe/steps/InferenceStep.java | 16 ++-- .../inference/InferenceRunnerTests.java | 86 ++++++++++++++++++- 5 files changed, 141 insertions(+), 43 deletions(-) create mode 100644 docs/changelog/109551.yaml diff --git a/docs/changelog/109551.yaml b/docs/changelog/109551.yaml new file mode 100644 index 0000000000000..f4949669091d9 --- /dev/null +++ b/docs/changelog/109551.yaml @@ -0,0 +1,5 @@ +pr: 109551 +summary: Avoid `InferenceRunner` deadlock +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java index 13f13a271c452..cb32ca01241a8 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/DataFrameAnalyticsManager.java @@ -306,7 +306,8 @@ private void buildInferenceStep(DataFrameAnalyticsTask task, DataFrameAnalyticsC config, extractedFields, task.getStatsHolder().getProgressTracker(), - task.getStatsHolder().getDataCountsTracker() + task.getStatsHolder().getDataCountsTracker(), + threadPool ); InferenceStep inferenceStep = new InferenceStep(client, task, auditor, config, threadPool, inferenceRunner); delegate.onResponse(inferenceStep); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunner.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunner.java index 073fb13cbf420..dfcc12d98be41 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunner.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunner.java @@ -11,13 +11,13 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.bulk.BulkRequest; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.support.PlainActionFuture; -import org.elasticsearch.action.support.UnsafePlainActionFuture; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.client.internal.Client; import org.elasticsearch.client.internal.OriginSettingClient; import org.elasticsearch.common.settings.Settings; @@ -29,6 +29,7 @@ import org.elasticsearch.search.aggregations.metrics.Max; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.core.ClientHelper; import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; @@ -69,6 +70,7 @@ public class InferenceRunner { private final ProgressTracker progressTracker; private final DataCountsTracker dataCountsTracker; private final Function testDocsIteratorFactory; + private final ThreadPool threadPool; private volatile boolean isCancelled; InferenceRunner( @@ -81,7 +83,8 @@ public class InferenceRunner { ExtractedFields extractedFields, ProgressTracker progressTracker, DataCountsTracker dataCountsTracker, - Function testDocsIteratorFactory + Function testDocsIteratorFactory, + ThreadPool threadPool ) { this.settings = Objects.requireNonNull(settings); this.client = Objects.requireNonNull(client); @@ -93,43 +96,49 @@ public class InferenceRunner { this.progressTracker = Objects.requireNonNull(progressTracker); this.dataCountsTracker = Objects.requireNonNull(dataCountsTracker); this.testDocsIteratorFactory = Objects.requireNonNull(testDocsIteratorFactory); + this.threadPool = threadPool; } public void cancel() { isCancelled = true; } - public void run(String modelId) { + public void run(String modelId, ActionListener listener) { if (isCancelled) { + listener.onResponse(null); return; } LOGGER.info("[{}] Started inference on test data against model [{}]", config.getId(), modelId); - try { - PlainActionFuture localModelPlainActionFuture = new UnsafePlainActionFuture<>( - MachineLearning.UTILITY_THREAD_POOL_NAME - ); - modelLoadingService.getModelForInternalInference(modelId, localModelPlainActionFuture); + SubscribableListener.newForked(l -> modelLoadingService.getModelForInternalInference(modelId, l)) + .andThen(threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME), threadPool.getThreadContext(), this::handleLocalModel) + .addListener(listener.delegateResponse((delegate, e) -> delegate.onFailure(handleException(modelId, e)))); + } + + private void handleLocalModel(ActionListener listener, LocalModel localModel) { + try (localModel) { InferenceState inferenceState = restoreInferenceState(); dataCountsTracker.setTestDocsCount(inferenceState.processedTestDocsCount); - TestDocsIterator testDocsIterator = testDocsIteratorFactory.apply(inferenceState.lastIncrementalId); - try (LocalModel localModel = localModelPlainActionFuture.actionGet()) { - LOGGER.debug("Loaded inference model [{}]", localModel); - inferTestDocs(localModel, testDocsIterator, inferenceState.processedTestDocsCount); - } - } catch (Exception e) { - LOGGER.error(() -> format("[%s] Error running inference on model [%s]", config.getId(), modelId), e); - if (e instanceof ElasticsearchException elasticsearchException) { - throw new ElasticsearchStatusException( - "[{}] failed running inference on model [{}]; cause was [{}]", - elasticsearchException.status(), - elasticsearchException.getRootCause(), - config.getId(), - modelId, - elasticsearchException.getRootCause().getMessage() - ); - } - throw ExceptionsHelper.serverError( + var testDocsIterator = testDocsIteratorFactory.apply(inferenceState.lastIncrementalId); + LOGGER.debug("Loaded inference model [{}]", localModel); + inferTestDocs(localModel, testDocsIterator, inferenceState.processedTestDocsCount); + listener.onResponse(null); // void + } + } + + private Exception handleException(String modelId, Exception e) { + LOGGER.error(() -> format("[%s] Error running inference on model [%s]", config.getId(), modelId), e); + if (e instanceof ElasticsearchException elasticsearchException) { + return new ElasticsearchStatusException( + "[{}] failed running inference on model [{}]; cause was [{}]", + elasticsearchException.status(), + elasticsearchException.getRootCause(), + config.getId(), + modelId, + elasticsearchException.getRootCause().getMessage() + ); + } else { + return ExceptionsHelper.serverError( "[{}] failed running inference on model [{}]; cause was [{}]", e, config.getId(), @@ -179,6 +188,11 @@ private InferenceState restoreInferenceState() { } private void inferTestDocs(LocalModel model, TestDocsIterator testDocsIterator, long processedTestDocsCount) { + assert ThreadPool.assertCurrentThreadPool(MachineLearning.UTILITY_THREAD_POOL_NAME) + : format( + "inferTestDocs must execute from [MachineLearning.UTILITY_THREAD_POOL_NAME] but thread is [%s]", + Thread.currentThread().getName() + ); long totalDocCount = 0; long processedDocCount = processedTestDocsCount; @@ -255,7 +269,8 @@ public static InferenceRunner create( DataFrameAnalyticsConfig config, ExtractedFields extractedFields, ProgressTracker progressTracker, - DataCountsTracker dataCountsTracker + DataCountsTracker dataCountsTracker, + ThreadPool threadPool ) { return new InferenceRunner( settings, @@ -272,7 +287,8 @@ public static InferenceRunner create( config, extractedFields, lastIncrementalId - ) + ), + threadPool ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/InferenceStep.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/InferenceStep.java index 37ad1a5cb8f56..482e82f9ec303 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/InferenceStep.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/InferenceStep.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.TransportSearchAction; import org.elasticsearch.client.internal.node.NodeClient; @@ -87,18 +88,15 @@ protected void doExecute(ActionListener listener) { } private void runInference(String modelId, ActionListener listener) { - threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() -> { - try { - inferenceRunner.run(modelId); - listener.onResponse(new StepResponse(isTaskStopping())); - } catch (Exception e) { + threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(ActionRunnable.wrap(listener, delegate -> { + inferenceRunner.run(modelId, ActionListener.wrap(aVoid -> delegate.onResponse(new StepResponse(isTaskStopping())), e -> { if (task.isStopping()) { - listener.onResponse(new StepResponse(false)); + delegate.onResponse(new StepResponse(false)); } else { - listener.onFailure(e); + delegate.onFailure(e); } - } - }); + })); + })); } private void searchIfTestDocsExist(ActionListener listener) { diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunnerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunnerTests.java index ad6b68e1051ff..c86596f237227 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunnerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/dataframe/inference/InferenceRunnerTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.search.DocValueFormat; @@ -61,13 +62,18 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; import java.util.stream.Collectors; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -115,7 +121,7 @@ public void testInferTestDocs() { return null; }).when(modelLoadingService).getModelForInternalInference(anyString(), any()); - createInferenceRunner(extractedFields, testDocsIterator).run("model id"); + run(createInferenceRunner(extractedFields, testDocsIterator)).assertSuccess(); var argumentCaptor = ArgumentCaptor.forClass(BulkRequest.class); @@ -146,8 +152,7 @@ public void testInferTestDocs_GivenCancelWasCalled() { InferenceRunner inferenceRunner = createInferenceRunner(extractedFields, infiniteDocsIterator); inferenceRunner.cancel(); - - inferenceRunner.run("model id"); + run(inferenceRunner).assertSuccess(); Mockito.verifyNoMoreInteractions(localModel, resultsPersisterService); assertThat(progressTracker.getInferenceProgressPercent(), equalTo(0)); @@ -178,7 +183,14 @@ private LocalModel localModelInferences(InferenceResults first, InferenceResults return localModel; } + private InferenceRunner createInferenceRunner(ExtractedFields extractedFields) { + return createInferenceRunner(extractedFields, mock(TestDocsIterator.class)); + } + private InferenceRunner createInferenceRunner(ExtractedFields extractedFields, TestDocsIterator testDocsIterator) { + var threadpool = mock(ThreadPool.class); + when(threadpool.executor(any())).thenReturn(EsExecutors.DIRECT_EXECUTOR_SERVICE); + when(threadpool.getThreadContext()).thenReturn(new ThreadContext(Settings.EMPTY)); return new InferenceRunner( Settings.EMPTY, client, @@ -189,10 +201,52 @@ private InferenceRunner createInferenceRunner(ExtractedFields extractedFields, T extractedFields, progressTracker, new DataCountsTracker(new DataCounts(config.getId())), - id -> testDocsIterator + id -> testDocsIterator, + threadpool ); } + private TestListener run(InferenceRunner inferenceRunner) { + var listener = new TestListener(); + inferenceRunner.run("id", listener); + return listener; + } + + /** + * When an exception is returned in a chained listener's onFailure call + * Then InferenceRunner should wrap it in an ElasticsearchException + */ + public void testModelLoadingServiceResponseWithAnException() { + var expectedCause = new IllegalArgumentException("this is a test"); + doAnswer(ans -> { + ActionListener responseListener = ans.getArgument(1); + responseListener.onFailure(expectedCause); + return null; + }).when(modelLoadingService).getModelForInternalInference(anyString(), any()); + + var actualException = run(createInferenceRunner(mock(ExtractedFields.class))).assertFailure(); + inferenceRunnerHandledException(actualException, expectedCause); + } + + /** + * When an exception is thrown within InferenceRunner + * Then InferenceRunner should wrap it in an ElasticsearchException + */ + public void testExceptionCallingModelLoadingService() { + var expectedCause = new IllegalArgumentException("this is a test"); + + doThrow(expectedCause).when(modelLoadingService).getModelForInternalInference(anyString(), any()); + + var actualException = run(createInferenceRunner(mock(ExtractedFields.class))).assertFailure(); + inferenceRunnerHandledException(actualException, expectedCause); + } + + private void inferenceRunnerHandledException(Exception actual, Exception expectedCause) { + assertThat(actual, instanceOf(ElasticsearchException.class)); + assertThat(actual.getCause(), is(expectedCause)); + assertThat(actual.getMessage(), equalTo("[test] failed running inference on model [id]; cause was [this is a test]")); + } + private Client mockClient() { var client = mock(Client.class); var threadpool = mock(ThreadPool.class); @@ -246,4 +300,28 @@ public SearchResponse actionGet() { } }; } + + private static class TestListener implements ActionListener { + private final AtomicBoolean success = new AtomicBoolean(false); + private final AtomicReference failure = new AtomicReference<>(); + + @Override + public void onResponse(Void t) { + success.set(true); + } + + @Override + public void onFailure(Exception e) { + failure.set(e); + } + + public void assertSuccess() { + assertTrue(success.get()); + } + + public Exception assertFailure() { + assertNotNull(failure.get()); + return failure.get(); + } + } } From e66cb6c632dd164a3810154ce00c8b3a3c0e8b5e Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 18 Jun 2024 13:30:27 +0100 Subject: [PATCH 22/26] AwaitsFix for #109852 --- .../test/java/org/elasticsearch/packaging/test/PackageTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java index 5c38fa36a6640..d3565dfc849be 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java @@ -210,6 +210,7 @@ public void test50Remove() throws Exception { assertThat(SYSTEMD_SERVICE, fileDoesNotExist()); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/109852") public void test60Reinstall() throws Exception { try { install(); From 55431dd07bbaed86553fc1824c89f68d432ca75e Mon Sep 17 00:00:00 2001 From: Niels Bauman <33722607+nielsbauman@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:29:49 +0200 Subject: [PATCH 23/26] Fix shards_capacity indicator assertion in Health API YAML test (#109808) This indicator is dependent on `HealthMetadata` being present in the cluster state, which we can't guarantee in this test, potentially resulting in an `unknown` status. --- .../resources/rest-api-spec/test/health/10_basic.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml index 5d5110fb54e45..46aa0862b7d9b 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml @@ -36,10 +36,9 @@ - exists: indicators.shards_availability.details.started_primaries - exists: indicators.shards_availability.details.unassigned_replicas - - match: { indicators.shards_capacity.status: "green" } - - match: { indicators.shards_capacity.symptom: "The cluster has enough room to add new shards." } - - exists: indicators.shards_capacity.details.data.max_shards_in_cluster - - exists: indicators.shards_capacity.details.frozen.max_shards_in_cluster + # The shards_availability indicator is dependent on HealthMetadata being present in the cluster state, which we can't guarantee. + - is_true: indicators.shards_capacity.status + - is_true: indicators.shards_capacity.symptom - is_true: indicators.data_stream_lifecycle.status - is_true: indicators.data_stream_lifecycle.symptom From 3a41d8c804cf58496ad1a561b1639585ed835023 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Tue, 18 Jun 2024 15:03:32 +0100 Subject: [PATCH 24/26] ES|QL Add serialization to fulltext predicate types (#109819) This commit adds serialization to fulltext predicate types, so that queries build atop these types can be sent to data nodes. --- .../predicate/fulltext/FullTextPredicate.java | 30 +++++++++- .../fulltext/MatchQueryPredicate.java | 20 +++++++ .../fulltext/MultiMatchQueryPredicate.java | 29 ++++++++++ .../fulltext/StringQueryPredicate.java | 20 +++++++ .../xpack/esql/io/stream/PlanNamedTypes.java | 4 ++ .../AbstractFulltextSerializationTests.java | 44 ++++++++++++++ .../MatchQuerySerializationTests.java | 34 +++++++++++ .../MultiMatchQuerySerializationTests.java | 58 +++++++++++++++++++ .../StringQuerySerializationTests.java | 34 +++++++++++ 9 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/AbstractFulltextSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MatchQuerySerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MultiMatchQuerySerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/FullTextPredicate.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/FullTextPredicate.java index 8da858865ed3f..e8ca84bc72988 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/FullTextPredicate.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/FullTextPredicate.java @@ -6,17 +6,28 @@ */ package org.elasticsearch.xpack.esql.core.expression.predicate.fulltext; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.Nullable; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.core.util.PlanStreamInput; +import org.elasticsearch.xpack.esql.core.util.PlanStreamOutput; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Objects; public abstract class FullTextPredicate extends Expression { + public static List getNamedWriteables() { + return List.of(MatchQueryPredicate.ENTRY, MultiMatchQueryPredicate.ENTRY, StringQueryPredicate.ENTRY); + } + public enum Operator { AND, OR; @@ -32,7 +43,7 @@ public org.elasticsearch.index.query.Operator toEs() { // common properties private final String analyzer; - FullTextPredicate(Source source, String query, String options, List children) { + FullTextPredicate(Source source, String query, @Nullable String options, List children) { super(source, children); this.query = query; this.options = options; @@ -41,6 +52,15 @@ public org.elasticsearch.index.query.Operator toEs() { this.analyzer = optionMap.get("analyzer"); } + protected FullTextPredicate(StreamInput in) throws IOException { + this( + Source.readFrom((StreamInput & PlanStreamInput) in), + in.readString(), + in.readOptionalString(), + in.readCollectionAsList(input -> ((PlanStreamInput) in).readExpression()) + ); + } + public String query() { return query; } @@ -67,6 +87,14 @@ public DataType dataType() { return DataType.BOOLEAN; } + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeString(query); + out.writeOptionalString(options); + out.writeCollection(children(), (o, v) -> ((PlanStreamOutput) o).writeExpression(v)); + } + @Override public int hashCode() { return Objects.hash(query, options); diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MatchQueryPredicate.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MatchQueryPredicate.java index fc5bd6320e445..f2e6088167ba5 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MatchQueryPredicate.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MatchQueryPredicate.java @@ -6,10 +6,13 @@ */ package org.elasticsearch.xpack.esql.core.expression.predicate.fulltext; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import java.io.IOException; import java.util.List; import java.util.Objects; @@ -17,6 +20,12 @@ public class MatchQueryPredicate extends FullTextPredicate { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "MatchQueryPredicate", + MatchQueryPredicate::new + ); + private final Expression field; public MatchQueryPredicate(Source source, Expression field, String query, String options) { @@ -24,6 +33,12 @@ public MatchQueryPredicate(Source source, Expression field, String query, String this.field = field; } + MatchQueryPredicate(StreamInput in) throws IOException { + super(in); + assert super.children().size() == 1; + field = super.children().get(0); + } + @Override protected NodeInfo info() { return NodeInfo.create(this, MatchQueryPredicate::new, field, query(), options()); @@ -51,4 +66,9 @@ public boolean equals(Object obj) { } return false; } + + @Override + public String getWriteableName() { + return ENTRY.name; + } } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MultiMatchQueryPredicate.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MultiMatchQueryPredicate.java index 9e9d55ab4759a..2d66023a1407d 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MultiMatchQueryPredicate.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/MultiMatchQueryPredicate.java @@ -6,10 +6,14 @@ */ package org.elasticsearch.xpack.esql.core.expression.predicate.fulltext; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import java.io.IOException; import java.util.List; import java.util.Map; import java.util.Objects; @@ -18,6 +22,12 @@ public class MultiMatchQueryPredicate extends FullTextPredicate { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "MultiMatchQueryPredicate", + MultiMatchQueryPredicate::new + ); + private final String fieldString; private final Map fields; @@ -28,6 +38,14 @@ public MultiMatchQueryPredicate(Source source, String fieldString, String query, this.fields = FullTextUtils.parseFields(fieldString, source); } + MultiMatchQueryPredicate(StreamInput in) throws IOException { + super(in); + assert super.children().isEmpty(); + fieldString = in.readString(); + // inferred + this.fields = FullTextUtils.parseFields(fieldString, source()); + } + @Override protected NodeInfo info() { return NodeInfo.create(this, MultiMatchQueryPredicate::new, fieldString, query(), options()); @@ -46,6 +64,12 @@ public Map fields() { return fields; } + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(fieldString); + } + @Override public int hashCode() { return Objects.hash(fieldString, super.hashCode()); @@ -59,4 +83,9 @@ public boolean equals(Object obj) { } return false; } + + @Override + public String getWriteableName() { + return ENTRY.name; + } } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java index 17b673cb0da4e..95000a5364e12 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/predicate/fulltext/StringQueryPredicate.java @@ -6,10 +6,13 @@ */ package org.elasticsearch.xpack.esql.core.expression.predicate.fulltext; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; +import java.io.IOException; import java.util.List; import java.util.Map; @@ -17,6 +20,12 @@ public final class StringQueryPredicate extends FullTextPredicate { + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "StringQueryPredicate", + StringQueryPredicate::new + ); + private final Map fields; public StringQueryPredicate(Source source, String query, String options) { @@ -26,6 +35,12 @@ public StringQueryPredicate(Source source, String query, String options) { this.fields = FullTextUtils.parseFields(optionMap(), source); } + StringQueryPredicate(StreamInput in) throws IOException { + super(in); + assert super.children().isEmpty(); + this.fields = FullTextUtils.parseFields(optionMap(), source()); + } + @Override protected NodeInfo info() { return NodeInfo.create(this, StringQueryPredicate::new, query(), options()); @@ -39,4 +54,9 @@ public Expression replaceChildren(List newChildren) { public Map fields() { return fields; } + + @Override + public String getWriteableName() { + return ENTRY.name; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java index 52989d2d2a277..0629af2c17980 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/io/stream/PlanNamedTypes.java @@ -29,6 +29,7 @@ import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.Order; import org.elasticsearch.xpack.esql.core.expression.function.scalar.ScalarFunction; +import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.FullTextPredicate; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.And; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.BinaryLogic; import org.elasticsearch.xpack.esql.core.expression.predicate.logical.Not; @@ -368,6 +369,9 @@ public static List namedTypeEntries() { for (NamedWriteableRegistry.Entry e : NamedExpression.getNamedWriteables()) { entries.add(of(Expression.class, e)); } + for (NamedWriteableRegistry.Entry e : FullTextPredicate.getNamedWriteables()) { + entries.add(of(Expression.class, e)); + } entries.add(of(Expression.class, UnsupportedAttribute.ENTRY)); entries.add(of(Expression.class, Literal.ENTRY)); entries.add(of(Expression.class, org.elasticsearch.xpack.esql.expression.Order.ENTRY)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/AbstractFulltextSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/AbstractFulltextSerializationTests.java new file mode 100644 index 0000000000000..88f88436f8a04 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/AbstractFulltextSerializationTests.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.predicate.operator.fulltext; + +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.FullTextPredicate; +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public abstract class AbstractFulltextSerializationTests extends AbstractExpressionSerializationTests { + + static final String OPTION_DELIMITER = ";"; + + @Override + protected List getNamedWriteables() { + return FullTextPredicate.getNamedWriteables(); + } + + String randomOptionOrNull() { + if (randomBoolean()) { + return null; + } + HashMap options = new HashMap<>(); + int maxOptions = randomInt(8); + for (int i = 0; i < maxOptions; i++) { + var opt = randomIndividualOption(); + options.computeIfAbsent(opt.getKey(), k -> opt.getValue()); // no duplicate options + } + return options.entrySet().stream().map(e -> e.getKey() + "=" + e.getValue()).collect(Collectors.joining(OPTION_DELIMITER)); + } + + Map.Entry randomIndividualOption() { + return Map.entry(randomAlphaOfLength(randomIntBetween(1, 4)), randomAlphaOfLength(randomIntBetween(1, 4))); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MatchQuerySerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MatchQuerySerializationTests.java new file mode 100644 index 0000000000000..80a538cf84baa --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MatchQuerySerializationTests.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.predicate.operator.fulltext; + +import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.MatchQueryPredicate; +import org.elasticsearch.xpack.esql.expression.AbstractExpressionSerializationTests; + +import java.io.IOException; + +public class MatchQuerySerializationTests extends AbstractFulltextSerializationTests { + + @Override + protected final MatchQueryPredicate createTestInstance() { + return new MatchQueryPredicate(randomSource(), randomChild(), randomAlphaOfLength(randomIntBetween(1, 16)), randomOptionOrNull()); + } + + @Override + protected MatchQueryPredicate mutateInstance(MatchQueryPredicate instance) throws IOException { + var field = instance.field(); + var query = instance.query(); + var options = instance.options(); + switch (between(0, 2)) { + case 0 -> field = randomValueOtherThan(field, AbstractExpressionSerializationTests::randomChild); + case 1 -> query = randomValueOtherThan(query, () -> randomAlphaOfLength(randomIntBetween(1, 16))); + case 2 -> options = randomValueOtherThan(options, this::randomOptionOrNull); + } + return new MatchQueryPredicate(instance.source(), field, query, options); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MultiMatchQuerySerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MultiMatchQuerySerializationTests.java new file mode 100644 index 0000000000000..d4d0f2edc11b1 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/MultiMatchQuerySerializationTests.java @@ -0,0 +1,58 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.predicate.operator.fulltext; + +import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.MultiMatchQueryPredicate; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; + +public class MultiMatchQuerySerializationTests extends AbstractFulltextSerializationTests { + + @Override + protected final MultiMatchQueryPredicate createTestInstance() { + return new MultiMatchQueryPredicate( + randomSource(), + randomFieldString(), + randomAlphaOfLength(randomIntBetween(1, 16)), + randomOptionOrNull() + ); + } + + @Override + protected MultiMatchQueryPredicate mutateInstance(MultiMatchQueryPredicate instance) throws IOException { + var fieldString = instance.fieldString(); + var query = instance.query(); + var options = instance.options(); + switch (between(0, 2)) { + case 0 -> fieldString = randomValueOtherThan(fieldString, this::randomFieldString); + case 1 -> query = randomValueOtherThan(query, () -> randomAlphaOfLength(randomIntBetween(1, 16))); + case 2 -> options = randomValueOtherThan(options, this::randomOptionOrNull); + } + return new MultiMatchQueryPredicate(instance.source(), fieldString, query, options); + } + + String randomFieldString() { + if (randomBoolean()) { + return ""; // empty, no fields + } + HashMap fields = new HashMap<>(); + int maxOptions = randomInt(4); + for (int i = 0; i < maxOptions; i++) { + var opt = randomIndividualField(); + fields.computeIfAbsent(opt.getKey(), k -> opt.getValue()); // no duplicate fields + } + return fields.entrySet().stream().map(e -> e.getKey() + "^" + e.getValue()).collect(Collectors.joining(",")); + } + + Map.Entry randomIndividualField() { + return Map.entry(randomAlphaOfLength(randomIntBetween(1, 4)), randomFloat()); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java new file mode 100644 index 0000000000000..ff00a161e1bb1 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/fulltext/StringQuerySerializationTests.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.predicate.operator.fulltext; + +import org.elasticsearch.xpack.esql.core.expression.predicate.fulltext.StringQueryPredicate; + +import java.io.IOException; + +public class StringQuerySerializationTests extends AbstractFulltextSerializationTests { + + private static final String COMMA = ","; + + @Override + protected final StringQueryPredicate createTestInstance() { + return new StringQueryPredicate(randomSource(), randomAlphaOfLength(randomIntBetween(1, 16)), randomOptionOrNull()); + } + + @Override + protected StringQueryPredicate mutateInstance(StringQueryPredicate instance) throws IOException { + var query = instance.query(); + var options = instance.options(); + if (randomBoolean()) { + query = randomValueOtherThan(query, () -> randomAlphaOfLength(randomIntBetween(1, 16))); + } else { + options = randomValueOtherThan(options, this::randomOptionOrNull); + } + return new StringQueryPredicate(instance.source(), query, options); + } +} From 429c970dd32e7e2a6fb3fa49cc24eb5ab0f5f47c Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 18 Jun 2024 10:06:01 -0400 Subject: [PATCH 25/26] Muting test70RestartServer for issue #109852 (#109860) --- .../test/java/org/elasticsearch/packaging/test/PackageTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java index d3565dfc849be..651bb6161017d 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/PackageTests.java @@ -224,6 +224,7 @@ public void test60Reinstall() throws Exception { } } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/109852") public void test70RestartServer() throws Exception { try { install(); From 86b80b655197505e56c5b2ec5c036a019bc3bf66 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 18 Jun 2024 07:24:27 -0700 Subject: [PATCH 26/26] Remove legacy IndexResolver (#109821) Currently, we are using the legacy IndexResolver alongside the updated version to compare their outputs. While this approach helps in bug detection, it adds to the maintenance burden, particularly when adding new features to the field-caps API or the index resolver. This change removes the legacy IndexResolver from the ESQL codebase. --- .../esql/core/index/IndexCompatibility.java | 49 - .../xpack/esql/core/index/IndexResolver.java | 1046 ----------------- .../core/index/RemoteClusterResolver.java | 40 - .../index/VersionCompatibilityChecks.java | 63 - .../esql/enrich/EnrichPolicyResolver.java | 42 +- .../xpack/esql/execution/PlanExecutor.java | 8 +- .../xpack/esql/plugin/EsqlPlugin.java | 14 +- .../xpack/esql/session/EsqlSession.java | 150 +-- ...lIndexResolver.java => IndexResolver.java} | 27 +- .../xpack/esql/analysis/AnalyzerTests.java | 4 +- .../enrich/EnrichPolicyResolverTests.java | 35 +- .../session/IndexResolverFieldNamesTests.java | 4 +- .../esql/stats/PlanExecutorMetricsTests.java | 10 +- .../esql/type/EsqlDataTypeRegistryTests.java | 4 +- 14 files changed, 70 insertions(+), 1426 deletions(-) delete mode 100644 x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexCompatibility.java delete mode 100644 x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexResolver.java delete mode 100644 x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/RemoteClusterResolver.java delete mode 100644 x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/VersionCompatibilityChecks.java rename x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/{EsqlIndexResolver.java => IndexResolver.java} (91%) diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexCompatibility.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexCompatibility.java deleted file mode 100644 index 6cc0816661f01..0000000000000 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexCompatibility.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.core.index; - -import org.elasticsearch.Version; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; - -import java.util.Map; - -import static org.elasticsearch.xpack.esql.core.index.VersionCompatibilityChecks.isTypeSupportedInVersion; -import static org.elasticsearch.xpack.esql.core.type.DataType.isPrimitive; -import static org.elasticsearch.xpack.esql.core.type.Types.propagateUnsupportedType; - -public final class IndexCompatibility { - - public static Map compatible(Map mapping, Version version) { - for (Map.Entry entry : mapping.entrySet()) { - EsField esField = entry.getValue(); - DataType dataType = esField.getDataType(); - if (isPrimitive(dataType) == false) { - compatible(esField.getProperties(), version); - } else if (isTypeSupportedInVersion(dataType, version) == false) { - EsField field = new UnsupportedEsField(entry.getKey(), dataType.nameUpper(), null, esField.getProperties()); - entry.setValue(field); - propagateUnsupportedType(entry.getKey(), dataType.nameUpper(), esField.getProperties()); - } - } - return mapping; - } - - public static EsIndex compatible(EsIndex esIndex, Version version) { - compatible(esIndex.mapping(), version); - return esIndex; - } - - public static IndexResolution compatible(IndexResolution indexResolution, Version version) { - if (indexResolution.isValid()) { - compatible(indexResolution.get(), version); - } - return indexResolution; - } -} diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexResolver.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexResolver.java deleted file mode 100644 index 63467eaadd8df..0000000000000 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/IndexResolver.java +++ /dev/null @@ -1,1046 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ -package org.elasticsearch.xpack.esql.core.index; - -import org.elasticsearch.ElasticsearchSecurityException; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; -import org.elasticsearch.action.admin.indices.get.GetIndexRequest; -import org.elasticsearch.action.admin.indices.get.GetIndexRequest.Feature; -import org.elasticsearch.action.admin.indices.resolve.ResolveIndexAction; -import org.elasticsearch.action.fieldcaps.FieldCapabilities; -import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest; -import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; -import org.elasticsearch.action.support.IndicesOptions; -import org.elasticsearch.client.internal.Client; -import org.elasticsearch.cluster.metadata.AliasMetadata; -import org.elasticsearch.common.Strings; -import org.elasticsearch.common.util.Maps; -import org.elasticsearch.core.Tuple; -import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.mapper.TimeSeriesParams; -import org.elasticsearch.transport.NoSuchRemoteClusterException; -import org.elasticsearch.xpack.esql.core.QlIllegalArgumentException; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.DataTypeRegistry; -import org.elasticsearch.xpack.esql.core.type.DateEsField; -import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; -import org.elasticsearch.xpack.esql.core.type.KeywordEsField; -import org.elasticsearch.xpack.esql.core.type.TextEsField; -import org.elasticsearch.xpack.esql.core.type.UnsupportedEsField; -import org.elasticsearch.xpack.esql.core.util.CollectionUtils; -import org.elasticsearch.xpack.esql.core.util.Holder; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Set; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.function.BiConsumer; -import java.util.function.BiFunction; -import java.util.function.Function; -import java.util.function.Supplier; -import java.util.regex.Pattern; - -import static java.util.Collections.emptyList; -import static java.util.Collections.emptyMap; -import static org.elasticsearch.action.ActionListener.wrap; -import static org.elasticsearch.common.Strings.hasText; -import static org.elasticsearch.common.regex.Regex.simpleMatch; -import static org.elasticsearch.transport.RemoteClusterAware.buildRemoteIndexName; -import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; -import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; -import static org.elasticsearch.xpack.esql.core.type.DataType.OBJECT; -import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; -import static org.elasticsearch.xpack.esql.core.util.StringUtils.qualifyAndJoinIndices; -import static org.elasticsearch.xpack.esql.core.util.StringUtils.splitQualifiedIndex; - -public class IndexResolver { - - public enum IndexType { - STANDARD_INDEX(SQL_TABLE, "INDEX"), - ALIAS(SQL_VIEW, "ALIAS"), - FROZEN_INDEX(SQL_TABLE, "FROZEN INDEX"), - // value for user types unrecognized - UNKNOWN("UNKNOWN", "UNKNOWN"); - - public static final EnumSet VALID_INCLUDE_FROZEN = EnumSet.of(STANDARD_INDEX, ALIAS, FROZEN_INDEX); - public static final EnumSet VALID_REGULAR = EnumSet.of(STANDARD_INDEX, ALIAS); - - private final String toSql; - private final String toNative; - - IndexType(String sql, String toNative) { - this.toSql = sql; - this.toNative = toNative; - } - - public String toSql() { - return toSql; - } - - public String toNative() { - return toNative; - } - } - - public record IndexInfo(String cluster, String name, IndexType type) { - - @Override - public String toString() { - return buildRemoteIndexName(cluster, name); - } - - } - - public static final String SQL_TABLE = "TABLE"; - public static final String SQL_VIEW = "VIEW"; - - private static final IndicesOptions INDICES_ONLY_OPTIONS = IndicesOptions.builder() - .concreteTargetOptions(IndicesOptions.ConcreteTargetOptions.ALLOW_UNAVAILABLE_TARGETS) - .wildcardOptions( - IndicesOptions.WildcardOptions.builder() - .matchOpen(true) - .matchClosed(false) - .includeHidden(false) - .allowEmptyExpressions(true) - .resolveAliases(false) - ) - .gatekeeperOptions( - IndicesOptions.GatekeeperOptions.builder().ignoreThrottled(true).allowClosedIndices(true).allowAliasToMultipleIndices(true) - ) - .build(); - private static final IndicesOptions FROZEN_INDICES_OPTIONS = IndicesOptions.builder() - .concreteTargetOptions(IndicesOptions.ConcreteTargetOptions.ALLOW_UNAVAILABLE_TARGETS) - .wildcardOptions( - IndicesOptions.WildcardOptions.builder() - .matchOpen(true) - .matchClosed(false) - .includeHidden(false) - .allowEmptyExpressions(true) - .resolveAliases(false) - ) - .gatekeeperOptions( - IndicesOptions.GatekeeperOptions.builder().ignoreThrottled(false).allowClosedIndices(true).allowAliasToMultipleIndices(true) - ) - .build(); - - public static final IndicesOptions FIELD_CAPS_INDICES_OPTIONS = IndicesOptions.builder() - .concreteTargetOptions(IndicesOptions.ConcreteTargetOptions.ALLOW_UNAVAILABLE_TARGETS) - .wildcardOptions( - IndicesOptions.WildcardOptions.builder() - .matchOpen(true) - .matchClosed(false) - .includeHidden(false) - .allowEmptyExpressions(true) - .resolveAliases(true) - ) - .gatekeeperOptions( - IndicesOptions.GatekeeperOptions.builder().ignoreThrottled(true).allowClosedIndices(true).allowAliasToMultipleIndices(true) - ) - .build(); - public static final IndicesOptions FIELD_CAPS_FROZEN_INDICES_OPTIONS = IndicesOptions.builder() - .concreteTargetOptions(IndicesOptions.ConcreteTargetOptions.ALLOW_UNAVAILABLE_TARGETS) - .wildcardOptions( - IndicesOptions.WildcardOptions.builder() - .matchOpen(true) - .matchClosed(false) - .includeHidden(false) - .allowEmptyExpressions(true) - .resolveAliases(true) - ) - .gatekeeperOptions( - IndicesOptions.GatekeeperOptions.builder().ignoreThrottled(false).allowClosedIndices(true).allowAliasToMultipleIndices(true) - ) - .build(); - - public static final Set ALL_FIELDS = Set.of("*"); - public static final Set INDEX_METADATA_FIELD = Set.of("_index"); - public static final String UNMAPPED = "unmapped"; - - private final Client client; - private final String clusterName; - private final DataTypeRegistry typeRegistry; - - private final Supplier> remoteClusters; - - public IndexResolver(Client client, String clusterName, DataTypeRegistry typeRegistry, Supplier> remoteClusters) { - this.client = client; - this.clusterName = clusterName; - this.typeRegistry = typeRegistry; - this.remoteClusters = remoteClusters; - } - - public String clusterName() { - return clusterName; - } - - public Set remoteClusters() { - return remoteClusters.get(); - } - - /** - * Resolves only the names, differentiating between indices and aliases. - * This method is required since the other methods rely on mapping which is tied to an index (not an alias). - */ - public void resolveNames( - String clusterWildcard, - String indexWildcard, - String javaRegex, - EnumSet types, - ActionListener> listener - ) { - - // first get aliases (if specified) - boolean retrieveAliases = CollectionUtils.isEmpty(types) || types.contains(IndexType.ALIAS); - boolean retrieveIndices = CollectionUtils.isEmpty(types) || types.contains(IndexType.STANDARD_INDEX); - boolean retrieveFrozenIndices = CollectionUtils.isEmpty(types) || types.contains(IndexType.FROZEN_INDEX); - - String[] indexWildcards = Strings.commaDelimitedListToStringArray(indexWildcard); - Set indexInfos = new HashSet<>(); - if (retrieveAliases && clusterIsLocal(clusterWildcard)) { - ResolveIndexAction.Request resolveRequest = new ResolveIndexAction.Request(indexWildcards, IndicesOptions.lenientExpandOpen()); - client.admin().indices().resolveIndex(resolveRequest, wrap(response -> { - for (ResolveIndexAction.ResolvedAlias alias : response.getAliases()) { - indexInfos.add(new IndexInfo(clusterName, alias.getName(), IndexType.ALIAS)); - } - for (ResolveIndexAction.ResolvedDataStream dataStream : response.getDataStreams()) { - indexInfos.add(new IndexInfo(clusterName, dataStream.getName(), IndexType.ALIAS)); - } - resolveIndices(clusterWildcard, indexWildcards, javaRegex, retrieveIndices, retrieveFrozenIndices, indexInfos, listener); - }, ex -> { - // with security, two exception can be thrown: - // INFE - if no alias matches - // security exception is the user cannot access aliases - - // in both cases, that is allowed and we continue with the indices request - if (ex instanceof IndexNotFoundException || ex instanceof ElasticsearchSecurityException) { - resolveIndices( - clusterWildcard, - indexWildcards, - javaRegex, - retrieveIndices, - retrieveFrozenIndices, - indexInfos, - listener - ); - } else { - listener.onFailure(ex); - } - })); - } else { - resolveIndices(clusterWildcard, indexWildcards, javaRegex, retrieveIndices, retrieveFrozenIndices, indexInfos, listener); - } - } - - private void resolveIndices( - String clusterWildcard, - String[] indexWildcards, - String javaRegex, - boolean retrieveIndices, - boolean retrieveFrozenIndices, - Set indexInfos, - ActionListener> listener - ) { - if (retrieveIndices || retrieveFrozenIndices) { - if (clusterIsLocal(clusterWildcard)) { // resolve local indices - GetIndexRequest indexRequest = new GetIndexRequest().local(true) - .indices(indexWildcards) - .features(Feature.SETTINGS) - .includeDefaults(false) - .indicesOptions(INDICES_ONLY_OPTIONS); - - // if frozen indices are requested, make sure to update the request accordingly - if (retrieveFrozenIndices) { - indexRequest.indicesOptions(FROZEN_INDICES_OPTIONS); - } - - client.admin().indices().getIndex(indexRequest, listener.delegateFailureAndWrap((delegate, indices) -> { - if (indices != null) { - for (String indexName : indices.getIndices()) { - boolean isFrozen = retrieveFrozenIndices - && indices.getSettings().get(indexName).getAsBoolean("index.frozen", false); - indexInfos.add( - new IndexInfo(clusterName, indexName, isFrozen ? IndexType.FROZEN_INDEX : IndexType.STANDARD_INDEX) - ); - } - } - resolveRemoteIndices(clusterWildcard, indexWildcards, javaRegex, retrieveFrozenIndices, indexInfos, delegate); - })); - } else { - resolveRemoteIndices(clusterWildcard, indexWildcards, javaRegex, retrieveFrozenIndices, indexInfos, listener); - } - } else { - filterResults(javaRegex, indexInfos, listener); - } - } - - private void resolveRemoteIndices( - String clusterWildcard, - String[] indexWildcards, - String javaRegex, - boolean retrieveFrozenIndices, - Set indexInfos, - ActionListener> listener - ) { - if (hasText(clusterWildcard)) { - IndicesOptions indicesOptions = retrieveFrozenIndices ? FIELD_CAPS_FROZEN_INDICES_OPTIONS : FIELD_CAPS_INDICES_OPTIONS; - FieldCapabilitiesRequest fieldRequest = createFieldCapsRequest( - qualifyAndJoinIndices(clusterWildcard, indexWildcards), - ALL_FIELDS, - indicesOptions, - emptyMap() - ); - client.fieldCaps(fieldRequest, wrap(response -> { - String[] indices = response.getIndices(); - if (indices != null) { - for (String indexName : indices) { - // TODO: perform two requests w/ & w/o frozen option to retrieve (by diff) the throttling status? - Tuple splitRef = splitQualifiedIndex(indexName); - // Field caps on "remote:foo" should always return either empty or remote indices. But in case cluster's - // detail is missing, it's going to be a local index. TODO: why would this happen? - String cluster = splitRef.v1() == null ? clusterName : splitRef.v1(); - indexInfos.add(new IndexInfo(cluster, splitRef.v2(), IndexType.STANDARD_INDEX)); - } - } - filterResults(javaRegex, indexInfos, listener); - }, ex -> { - // see comment in resolveNames() - if (ex instanceof NoSuchRemoteClusterException || ex instanceof ElasticsearchSecurityException) { - filterResults(javaRegex, indexInfos, listener); - } else { - listener.onFailure(ex); - } - })); - } else { - filterResults(javaRegex, indexInfos, listener); - } - } - - private static void filterResults(String javaRegex, Set indexInfos, ActionListener> listener) { - - // since the index name does not support ?, filter the results manually - Pattern pattern = javaRegex != null ? Pattern.compile(javaRegex) : null; - - Set result = new TreeSet<>(Comparator.comparing(IndexInfo::cluster).thenComparing(IndexInfo::name)); - for (IndexInfo indexInfo : indexInfos) { - if (pattern == null || pattern.matcher(indexInfo.name()).matches()) { - result.add(indexInfo); - } - } - listener.onResponse(result); - } - - private boolean clusterIsLocal(String clusterWildcard) { - return clusterWildcard == null || simpleMatch(clusterWildcard, clusterName); - } - - /** - * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping. - */ - public void resolveAsMergedMapping( - String indexWildcard, - Set fieldNames, - IndicesOptions indicesOptions, - Map runtimeMappings, - ActionListener listener - ) { - FieldCapabilitiesRequest fieldRequest = createFieldCapsRequest(indexWildcard, fieldNames, indicesOptions, runtimeMappings); - client.fieldCaps( - fieldRequest, - listener.delegateFailureAndWrap((l, response) -> l.onResponse(mergedMappings(typeRegistry, indexWildcard, response))) - ); - } - - /** - * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping. - */ - public void resolveAsMergedMapping( - String indexWildcard, - Set fieldNames, - boolean includeFrozen, - Map runtimeMappings, - ActionListener listener - ) { - resolveAsMergedMapping(indexWildcard, fieldNames, includeFrozen, runtimeMappings, listener, (fieldName, types) -> null); - } - - /** - * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping. - */ - public void resolveAsMergedMapping( - String indexWildcard, - Set fieldNames, - boolean includeFrozen, - Map runtimeMappings, - ActionListener listener, - BiFunction, InvalidMappedField> specificValidityVerifier - ) { - FieldCapabilitiesRequest fieldRequest = createFieldCapsRequest(indexWildcard, fieldNames, includeFrozen, runtimeMappings); - client.fieldCaps( - fieldRequest, - listener.delegateFailureAndWrap( - (l, response) -> l.onResponse(mergedMappings(typeRegistry, indexWildcard, response, specificValidityVerifier, null, null)) - ) - ); - } - - public void resolveAsMergedMapping( - String indexWildcard, - Set fieldNames, - boolean includeFrozen, - Map runtimeMappings, - ActionListener listener, - BiFunction, InvalidMappedField> specificValidityVerifier, - BiConsumer fieldUpdater, - Set allowedMetadataFields - ) { - FieldCapabilitiesRequest fieldRequest = createFieldCapsRequest(indexWildcard, fieldNames, includeFrozen, runtimeMappings); - client.fieldCaps( - fieldRequest, - listener.delegateFailureAndWrap( - (l, response) -> l.onResponse( - mergedMappings(typeRegistry, indexWildcard, response, specificValidityVerifier, fieldUpdater, allowedMetadataFields) - ) - ) - ); - } - - public static IndexResolution mergedMappings( - DataTypeRegistry typeRegistry, - String indexPattern, - FieldCapabilitiesResponse fieldCapsResponse, - BiFunction, InvalidMappedField> specificValidityVerifier - ) { - return mergedMappings(typeRegistry, indexPattern, fieldCapsResponse, specificValidityVerifier, null, null); - } - - public static IndexResolution mergedMappings( - DataTypeRegistry typeRegistry, - String indexPattern, - FieldCapabilitiesResponse fieldCapsResponse, - BiFunction, InvalidMappedField> specificValidityVerifier, - BiConsumer fieldUpdater, - Set allowedMetadataFields - ) { - - if (fieldCapsResponse.getIndices().length == 0) { - return IndexResolution.notFound(indexPattern); - } - - BiFunction, InvalidMappedField> validityVerifier = (fieldName, types) -> { - InvalidMappedField f = specificValidityVerifier.apply(fieldName, types); - if (f != null) { - return f; - } - - StringBuilder errorMessage = new StringBuilder(); - boolean hasUnmapped = types.containsKey(UNMAPPED); - - if (types.size() > (hasUnmapped ? 2 : 1)) { - // build the error message - // and create a MultiTypeField - - for (Entry type : types.entrySet()) { - // skip unmapped - if (UNMAPPED.equals(type.getKey())) { - continue; - } - - if (errorMessage.length() > 0) { - errorMessage.append(", "); - } - errorMessage.append("["); - errorMessage.append(type.getKey()); - errorMessage.append("] in "); - errorMessage.append(Arrays.toString(type.getValue().indices())); - } - - errorMessage.insert(0, "mapped as [" + (types.size() - (hasUnmapped ? 1 : 0)) + "] incompatible types: "); - - return new InvalidMappedField(fieldName, errorMessage.toString()); - } - // type is okay, check aggregation - else { - FieldCapabilities fieldCap = types.values().iterator().next(); - - // validate search/agg-able - if (fieldCap.isAggregatable() && fieldCap.nonAggregatableIndices() != null) { - errorMessage.append("mapped as aggregatable except in "); - errorMessage.append(Arrays.toString(fieldCap.nonAggregatableIndices())); - } - if (fieldCap.isSearchable() && fieldCap.nonSearchableIndices() != null) { - if (errorMessage.length() > 0) { - errorMessage.append(","); - } - errorMessage.append("mapped as searchable except in "); - errorMessage.append(Arrays.toString(fieldCap.nonSearchableIndices())); - } - - if (errorMessage.length() > 0) { - return new InvalidMappedField(fieldName, errorMessage.toString()); - } - } - - // everything checks - return null; - }; - - // merge all indices onto the same one - List indices = buildIndices( - typeRegistry, - null, - fieldCapsResponse, - null, - i -> indexPattern, - validityVerifier, - fieldUpdater, - allowedMetadataFields - ); - - if (indices.size() > 1) { - throw new QlIllegalArgumentException( - "Incorrect merging of mappings (likely due to a bug) - expect at most one but found [{}]", - indices.size() - ); - } - - String[] indexNames = fieldCapsResponse.getIndices(); - if (indices.isEmpty()) { - return IndexResolution.valid(new EsIndex(indexNames[0], emptyMap(), Set.of())); - } else { - EsIndex idx = indices.get(0); - return IndexResolution.valid(new EsIndex(idx.name(), idx.mapping(), Set.of(indexNames))); - } - } - - public static IndexResolution mergedMappings( - DataTypeRegistry typeRegistry, - String indexPattern, - FieldCapabilitiesResponse fieldCapsResponse - ) { - return mergedMappings(typeRegistry, indexPattern, fieldCapsResponse, (fieldName, types) -> null, null, null); - } - - private static EsField createField( - DataTypeRegistry typeRegistry, - String fieldName, - Map> globalCaps, - Map hierarchicalMapping, - Map flattedMapping, - Function field - ) { - - Map parentProps = hierarchicalMapping; - - int dot = fieldName.lastIndexOf('.'); - String fullFieldName = fieldName; - EsField parent = null; - - if (dot >= 0) { - String parentName = fieldName.substring(0, dot); - fieldName = fieldName.substring(dot + 1); - parent = flattedMapping.get(parentName); - if (parent == null) { - Map map = globalCaps.get(parentName); - Function fieldFunction; - - // lack of parent implies the field is an alias - if (map == null) { - // as such, create the field manually, marking the field to also be an alias - fieldFunction = s -> createField(typeRegistry, s, OBJECT.esType(), null, new TreeMap<>(), false, true); - } else { - Iterator iterator = map.values().iterator(); - FieldCapabilities parentCap = iterator.next(); - if (iterator.hasNext() && UNMAPPED.equals(parentCap.getType())) { - parentCap = iterator.next(); - } - final FieldCapabilities parentC = parentCap; - fieldFunction = s -> createField( - typeRegistry, - s, - parentC.getType(), - parentC.getMetricType(), - new TreeMap<>(), - parentC.isAggregatable(), - false - ); - } - - parent = createField(typeRegistry, parentName, globalCaps, hierarchicalMapping, flattedMapping, fieldFunction); - } - parentProps = parent.getProperties(); - } - - EsField esField = field.apply(fieldName); - - if (parent instanceof UnsupportedEsField unsupportedParent) { - String inherited = unsupportedParent.getInherited(); - String type = unsupportedParent.getOriginalType(); - - if (inherited == null) { - // mark the sub-field as unsupported, just like its parent, setting the first unsupported parent as the current one - esField = new UnsupportedEsField(esField.getName(), type, unsupportedParent.getName(), esField.getProperties()); - } else { - // mark the sub-field as unsupported, just like its parent, but setting the first unsupported parent - // as the parent's first unsupported grandparent - esField = new UnsupportedEsField(esField.getName(), type, inherited, esField.getProperties()); - } - } - - parentProps.put(fieldName, esField); - flattedMapping.put(fullFieldName, esField); - - return esField; - } - - private static EsField createField( - DataTypeRegistry typeRegistry, - String fieldName, - String typeName, - TimeSeriesParams.MetricType metricType, - Map props, - boolean isAggregateable, - boolean isAlias - ) { - DataType esType = typeRegistry.fromEs(typeName, metricType); - - if (esType == TEXT) { - return new TextEsField(fieldName, props, false, isAlias); - } - if (esType == KEYWORD) { - int length = Short.MAX_VALUE; - // TODO: to check whether isSearchable/isAggregateable takes into account the presence of the normalizer - boolean normalized = false; - return new KeywordEsField(fieldName, props, isAggregateable, length, normalized, isAlias); - } - if (esType == DATETIME) { - return DateEsField.dateEsField(fieldName, props, isAggregateable); - } - if (esType == UNSUPPORTED) { - String originalType = metricType == TimeSeriesParams.MetricType.COUNTER ? "counter" : typeName; - return new UnsupportedEsField(fieldName, originalType, null, props); - } - - return new EsField(fieldName, esType, props, isAggregateable, isAlias); - } - - private static FieldCapabilitiesRequest createFieldCapsRequest( - String index, - Set fieldNames, - IndicesOptions indicesOptions, - Map runtimeMappings - ) { - return new FieldCapabilitiesRequest().indices(Strings.commaDelimitedListToStringArray(index)) - .fields(fieldNames.toArray(String[]::new)) - .includeUnmapped(true) - .runtimeFields(runtimeMappings) - // lenient because we throw our own errors looking at the response e.g. if something was not resolved - // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable - .indicesOptions(indicesOptions); - } - - private static FieldCapabilitiesRequest createFieldCapsRequest( - String index, - Set fieldNames, - boolean includeFrozen, - Map runtimeMappings - ) { - IndicesOptions indicesOptions = includeFrozen ? FIELD_CAPS_FROZEN_INDICES_OPTIONS : FIELD_CAPS_INDICES_OPTIONS; - return createFieldCapsRequest(index, fieldNames, indicesOptions, runtimeMappings); - } - - /** - * Resolves a pattern to multiple, separate indices. Doesn't perform validation. - */ - public void resolveAsSeparateMappings( - String indexWildcard, - String javaRegex, - boolean includeFrozen, - Map runtimeMappings, - ActionListener> listener - ) { - FieldCapabilitiesRequest fieldRequest = createFieldCapsRequest(indexWildcard, ALL_FIELDS, includeFrozen, runtimeMappings); - client.fieldCaps(fieldRequest, listener.delegateFailureAndWrap((delegate, response) -> { - client.admin().indices().getAliases(createGetAliasesRequest(response, includeFrozen), wrap(aliases -> { - delegate.onResponse(separateMappings(typeRegistry, javaRegex, response, aliases.getAliases())); - }, ex -> { - if (ex instanceof IndexNotFoundException || ex instanceof ElasticsearchSecurityException) { - delegate.onResponse(separateMappings(typeRegistry, javaRegex, response, null)); - } else { - delegate.onFailure(ex); - } - })); - })); - - } - - private static GetAliasesRequest createGetAliasesRequest(FieldCapabilitiesResponse response, boolean includeFrozen) { - return new GetAliasesRequest().aliases("*") - .indices(response.getIndices()) - .indicesOptions(includeFrozen ? FIELD_CAPS_FROZEN_INDICES_OPTIONS : FIELD_CAPS_INDICES_OPTIONS); - } - - public static List separateMappings( - DataTypeRegistry typeRegistry, - String javaRegex, - FieldCapabilitiesResponse fieldCaps, - Map> aliases - ) { - return buildIndices(typeRegistry, javaRegex, fieldCaps, aliases, Function.identity(), (s, cap) -> null, null, null); - } - - private static class Fields { - final Map hierarchicalMapping = new TreeMap<>(); - final Map flattedMapping = new LinkedHashMap<>(); - } - - /** - * Assemble an index-based mapping from the field caps (which is field based) by looking at the indices associated with - * each field. - */ - private static List buildIndices( - DataTypeRegistry typeRegistry, - String javaRegex, - FieldCapabilitiesResponse fieldCapsResponse, - Map> aliases, - Function indexNameProcessor, - BiFunction, InvalidMappedField> validityVerifier, - BiConsumer fieldUpdater, - Set allowedMetadataFields - ) { - - if ((fieldCapsResponse.getIndices() == null || fieldCapsResponse.getIndices().length == 0) - && (aliases == null || aliases.isEmpty())) { - return emptyList(); - } - - Set resolvedAliases = new HashSet<>(); - if (aliases != null) { - for (var aliasList : aliases.values()) { - for (AliasMetadata alias : aliasList) { - resolvedAliases.add(alias.getAlias()); - } - } - } - - Map indices = Maps.newLinkedHashMapWithExpectedSize(fieldCapsResponse.getIndices().length + resolvedAliases.size()); - Pattern pattern = javaRegex != null ? Pattern.compile(javaRegex) : null; - - // sort fields in reverse order to build the field hierarchy - TreeMap> sortedFields = new TreeMap<>(Collections.reverseOrder()); - final Map> fieldCaps = fieldCapsResponse.get(); - for (Entry> entry : fieldCaps.entrySet()) { - String fieldName = entry.getKey(); - // skip specific metadata fields - if ((allowedMetadataFields != null && allowedMetadataFields.contains(fieldName)) - || fieldCapsResponse.isMetadataField(fieldName) == false) { - sortedFields.put(fieldName, entry.getValue()); - } - } - - for (Entry> entry : sortedFields.entrySet()) { - String fieldName = entry.getKey(); - Map types = entry.getValue(); - final InvalidMappedField invalidField = validityVerifier.apply(fieldName, types); - // apply verification for fields belonging to index aliases - Map invalidFieldsForAliases = getInvalidFieldsForAliases(fieldName, types, aliases); - // For ESQL there are scenarios where there is no field asked from field_caps and the field_caps response only contains - // the list of indices. To be able to still have an "indices" list properly built (even if empty), the metadata fields are - // accepted but not actually added to each index hierarchy. - boolean isMetadataField = allowedMetadataFields != null && allowedMetadataFields.contains(fieldName); - - // check each type - for (Entry typeEntry : types.entrySet()) { - if (UNMAPPED.equals(typeEntry.getKey())) { - continue; - } - FieldCapabilities typeCap = typeEntry.getValue(); - String[] capIndices = typeCap.indices(); - - // compute the actual indices - if any are specified, take into account the unmapped indices - final String[] concreteIndices; - if (capIndices != null) { - concreteIndices = capIndices; - } else { - concreteIndices = fieldCapsResponse.getIndices(); - } - - Set uniqueAliases = new LinkedHashSet<>(); - // put the field in their respective mappings and collect the aliases names - for (String index : concreteIndices) { - List concreteIndexAliases = aliases != null ? aliases.get(index) : null; - if (concreteIndexAliases != null) { - for (AliasMetadata e : concreteIndexAliases) { - uniqueAliases.add(e.alias()); - } - } - // TODO is split still needed? - if (pattern == null || pattern.matcher(splitQualifiedIndex(index).v2()).matches()) { - String indexName = indexNameProcessor.apply(index); - Fields indexFields = indices.computeIfAbsent(indexName, k -> new Fields()); - EsField field = indexFields.flattedMapping.get(fieldName); - // create field hierarchy or update it in case of an invalid field - if (isMetadataField == false - && (field == null || (invalidField != null && (field instanceof InvalidMappedField) == false))) { - createField(typeRegistry, fieldName, indexFields, fieldCaps, invalidField, typeCap); - - // In evolving mappings, it is possible for a field to be promoted to an object in new indices - // meaning there are subfields associated with this *invalid* field. - // index_A: file -> keyword - // index_B: file -> object, file.name = keyword - // - // In the scenario above file is problematic but file.name is not. This scenario is addressed - // below through the dedicated callback - copy the existing properties or drop them all together. - // Note this applies for *invalid* fields (that have conflicts), not *unsupported* (those that cannot be read) - // See https://github.com/elastic/elasticsearch/pull/100875 - - // Postpone the call until is really needed - if (fieldUpdater != null && field != null) { - EsField newField = indexFields.flattedMapping.get(fieldName); - if (newField != field && newField instanceof InvalidMappedField newInvalidField) { - fieldUpdater.accept(field, newInvalidField); - } - } - } - } - } - // put the field in their respective mappings by alias name - for (String index : uniqueAliases) { - Fields indexFields = indices.computeIfAbsent(index, k -> new Fields()); - EsField field = indexFields.flattedMapping.get(fieldName); - if (isMetadataField == false && field == null && invalidFieldsForAliases.get(index) == null) { - createField(typeRegistry, fieldName, indexFields, fieldCaps, invalidField, typeCap); - } - } - } - } - - // return indices in ascending order - List foundIndices = new ArrayList<>(indices.size()); - for (Entry entry : indices.entrySet()) { - foundIndices.add(new EsIndex(entry.getKey(), entry.getValue().hierarchicalMapping, Set.of(entry.getKey()))); - } - foundIndices.sort(Comparator.comparing(EsIndex::name)); - return foundIndices; - } - - private static void createField( - DataTypeRegistry typeRegistry, - String fieldName, - Fields indexFields, - Map> fieldCaps, - InvalidMappedField invalidField, - FieldCapabilities typeCap - ) { - int dot = fieldName.lastIndexOf('.'); - /* - * Looking up the "tree" at the parent fields here to see if the field is an alias. - * When the upper elements of the "tree" have no elements in fieldcaps, then this is an alias field. But not - * always: if there are two aliases - a.b.c.alias1 and a.b.c.alias2 - only one of them will be considered alias. - */ - Holder isAliasFieldType = new Holder<>(false); - if (dot >= 0) { - String parentName = fieldName.substring(0, dot); - if (indexFields.flattedMapping.get(parentName) == null) { - // lack of parent implies the field is an alias - if (fieldCaps.get(parentName) == null) { - isAliasFieldType.set(true); - } - } - } - - createField( - typeRegistry, - fieldName, - fieldCaps, - indexFields.hierarchicalMapping, - indexFields.flattedMapping, - s -> invalidField != null - ? invalidField - : createField( - typeRegistry, - s, - typeCap.getType(), - typeCap.getMetricType(), - new TreeMap<>(), - typeCap.isAggregatable(), - isAliasFieldType.get() - ) - ); - } - - /* - * Checks if the field is valid (same type and same capabilities - searchable/aggregatable) across indices belonging to a list - * of aliases. - * A field can look like the example below (generated by field_caps API). - * "name": { - * "text": { - * "type": "text", - * "searchable": false, - * "aggregatable": false, - * "indices": [ - * "bar", - * "foo" - * ], - * "non_searchable_indices": [ - * "foo" - * ] - * }, - * "keyword": { - * "type": "keyword", - * "searchable": false, - * "aggregatable": true, - * "non_aggregatable_indices": [ - * "bar", "baz" - * ] - * } - * } - */ - private static Map getInvalidFieldsForAliases( - String fieldName, - Map types, - Map> aliases - ) { - if (aliases == null || aliases.isEmpty()) { - return emptyMap(); - } - Map invalidFields = new HashMap<>(); - Map> typesErrors = new HashMap<>(); // map holding aliases and a list of unique field types across its indices - Map> aliasToIndices = new HashMap<>(); // map with aliases and their list of indices - - for (var entry : aliases.entrySet()) { - for (AliasMetadata aliasMetadata : entry.getValue()) { - String aliasName = aliasMetadata.alias(); - aliasToIndices.putIfAbsent(aliasName, new HashSet<>()); - aliasToIndices.get(aliasName).add(entry.getKey()); - } - } - - // iterate over each type - for (Entry type : types.entrySet()) { - String esFieldType = type.getKey(); - if (Objects.equals(esFieldType, UNMAPPED)) { - continue; - } - String[] indices = type.getValue().indices(); - // if there is a list of indices where this field type is defined - if (indices != null) { - // Look at all these indices' aliases and add the type of the field to a list (Set) with unique elements. - // A valid mapping for a field in an index alias should contain only one type. If it doesn't, this means that field - // is mapped as different types across the indices in this index alias. - for (String index : indices) { - List indexAliases = aliases.get(index); - if (indexAliases == null) { - continue; - } - for (AliasMetadata aliasMetadata : indexAliases) { - String aliasName = aliasMetadata.alias(); - if (typesErrors.containsKey(aliasName)) { - typesErrors.get(aliasName).add(esFieldType); - } else { - Set fieldTypes = new HashSet<>(); - fieldTypes.add(esFieldType); - typesErrors.put(aliasName, fieldTypes); - } - } - } - } - } - - for (String aliasName : aliasToIndices.keySet()) { - // if, for the same index alias, there are multiple field types for this fieldName ie the index alias has indices where the same - // field name is of different types - Set esFieldTypes = typesErrors.get(aliasName); - if (esFieldTypes != null && esFieldTypes.size() > 1) { - // consider the field as invalid, for the currently checked index alias - // the error message doesn't actually matter - invalidFields.put(aliasName, new InvalidMappedField(fieldName)); - } else { - // if the field type is the same across all this alias' indices, check the field's capabilities (searchable/aggregatable) - for (Entry type : types.entrySet()) { - if (Objects.equals(type.getKey(), UNMAPPED)) { - continue; - } - FieldCapabilities f = type.getValue(); - - // the existence of a list of non_aggregatable_indices is an indication that not all indices have the same capabilities - // but this list can contain indices belonging to other aliases, so we need to check only for this alias - if (f.nonAggregatableIndices() != null) { - Set aliasIndices = aliasToIndices.get(aliasName); - int nonAggregatableCount = 0; - // either all or none of the non-aggregatable indices belonging to a certain alias should be in this list - for (String nonAggIndex : f.nonAggregatableIndices()) { - if (aliasIndices.contains(nonAggIndex)) { - nonAggregatableCount++; - } - } - if (nonAggregatableCount > 0 && nonAggregatableCount != aliasIndices.size()) { - invalidFields.put(aliasName, new InvalidMappedField(fieldName)); - break; - } - } - - // perform the same check for non_searchable_indices list - if (f.nonSearchableIndices() != null) { - Set aliasIndices = aliasToIndices.get(aliasName); - int nonSearchableCount = 0; - // either all or none of the non-searchable indices belonging to a certain alias should be in this list - for (String nonSearchIndex : f.nonSearchableIndices()) { - if (aliasIndices.contains(nonSearchIndex)) { - nonSearchableCount++; - } - } - if (nonSearchableCount > 0 && nonSearchableCount != aliasIndices.size()) { - invalidFields.put(aliasName, new InvalidMappedField(fieldName)); - break; - } - } - } - } - } - - if (invalidFields.size() > 0) { - return invalidFields; - } - // everything checks - return emptyMap(); - } - - /** - * Callback interface used when transitioning an already discovered EsField to an InvalidMapped one. - * By default, this interface is not used, meaning when a field is marked as invalid all its subfields - * are removed (are dropped). - * For cases where this is not desired, a different strategy can be employed such as keeping the properties: - * @see IndexResolver#PRESERVE_PROPERTIES - */ - public interface ExistingFieldInvalidCallback extends BiConsumer {}; - - /** - * Preserve the properties (sub fields) of an existing field even when marking it as invalid. - */ - public static ExistingFieldInvalidCallback PRESERVE_PROPERTIES = (oldField, newField) -> { - var oldProps = oldField.getProperties(); - if (oldProps.size() > 0) { - newField.getProperties().putAll(oldProps); - } - }; -} diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/RemoteClusterResolver.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/RemoteClusterResolver.java deleted file mode 100644 index e83eddc71000b..0000000000000 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/RemoteClusterResolver.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.core.index; - -import org.elasticsearch.common.settings.ClusterSettings; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.transport.RemoteClusterAware; -import org.elasticsearch.transport.RemoteConnectionStrategy; - -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.CopyOnWriteArraySet; - -public final class RemoteClusterResolver extends RemoteClusterAware { - private final CopyOnWriteArraySet clusters; - - public RemoteClusterResolver(Settings settings, ClusterSettings clusterSettings) { - super(settings); - clusters = new CopyOnWriteArraySet<>(getEnabledRemoteClusters(settings)); - listenForUpdates(clusterSettings); - } - - @Override - protected void updateRemoteCluster(String clusterAlias, Settings settings) { - if (RemoteConnectionStrategy.isConnectionEnabled(clusterAlias, settings)) { - clusters.add(clusterAlias); - } else { - clusters.remove(clusterAlias); - } - } - - public Set remoteClusters() { - return new TreeSet<>(clusters); - } -} diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/VersionCompatibilityChecks.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/VersionCompatibilityChecks.java deleted file mode 100644 index e4ae4f8f0d51f..0000000000000 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/index/VersionCompatibilityChecks.java +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.esql.core.index; - -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.Version; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.xpack.esql.core.type.DataType; - -import static org.elasticsearch.Version.V_8_2_0; -import static org.elasticsearch.Version.V_8_4_0; -import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; -import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; - -public final class VersionCompatibilityChecks { - - public static final Version INTRODUCING_UNSIGNED_LONG = V_8_2_0; - public static final TransportVersion INTRODUCING_UNSIGNED_LONG_TRANSPORT = TransportVersions.V_8_2_0; - public static final Version INTRODUCING_VERSION_FIELD_TYPE = V_8_4_0; - - private VersionCompatibilityChecks() {} - - public static boolean isTypeSupportedInVersion(DataType dataType, Version version) { - if (dataType == UNSIGNED_LONG) { - return supportsUnsignedLong(version); - } - if (dataType == VERSION) { - return supportsVersionType(version); - } - return true; - } - - /** - * Does the provided {@code version} support the unsigned_long type (PR#60050)? - */ - public static boolean supportsUnsignedLong(Version version) { - return INTRODUCING_UNSIGNED_LONG.compareTo(version) <= 0; - } - - /** - * Does the provided {@code version} support the version type (PR#85502)? - */ - public static boolean supportsVersionType(Version version) { - return INTRODUCING_VERSION_FIELD_TYPE.compareTo(version) <= 0; - } - - public static @Nullable Version versionIntroducingType(DataType dataType) { - if (dataType == UNSIGNED_LONG) { - return INTRODUCING_UNSIGNED_LONG; - } - if (dataType == VERSION) { - return INTRODUCING_VERSION_FIELD_TYPE; - } - - return null; - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java index 2b29d36cdfa1d..82eda9679074d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java @@ -37,11 +37,10 @@ import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; import org.elasticsearch.xpack.esql.core.index.EsIndex; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.plan.logical.Enrich; -import org.elasticsearch.xpack.esql.session.EsqlSession; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypes; import java.io.IOException; @@ -359,29 +358,22 @@ public void messageReceived(LookupRequest request, TransportChannel channel, Tas } try (ThreadContext.StoredContext ignored = threadContext.stashWithOrigin(ClientHelper.ENRICH_ORIGIN)) { String indexName = EnrichPolicy.getBaseName(policyName); - indexResolver.resolveAsMergedMapping( - indexName, - IndexResolver.ALL_FIELDS, - false, - Map.of(), - refs.acquire(indexResult -> { - if (indexResult.isValid() && indexResult.get().concreteIndices().size() == 1) { - EsIndex esIndex = indexResult.get(); - var concreteIndices = Map.of(request.clusterAlias, Iterables.get(esIndex.concreteIndices(), 0)); - var resolved = new ResolvedEnrichPolicy( - p.getMatchField(), - p.getType(), - p.getEnrichFields(), - concreteIndices, - esIndex.mapping() - ); - resolvedPolices.put(policyName, resolved); - } else { - failures.put(policyName, indexResult.toString()); - } - }), - EsqlSession::specificValidity - ); + indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, refs.acquire(indexResult -> { + if (indexResult.isValid() && indexResult.get().concreteIndices().size() == 1) { + EsIndex esIndex = indexResult.get(); + var concreteIndices = Map.of(request.clusterAlias, Iterables.get(esIndex.concreteIndices(), 0)); + var resolved = new ResolvedEnrichPolicy( + p.getMatchField(), + p.getType(), + p.getEnrichFields(), + concreteIndices, + esIndex.mapping() + ); + resolvedPolices.put(policyName, resolved); + } else { + failures.put(policyName, indexResult.toString()); + } + })); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java index 7af2668e9d74b..f4979fa9928db 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java @@ -12,7 +12,6 @@ import org.elasticsearch.xpack.esql.analysis.PreAnalyzer; import org.elasticsearch.xpack.esql.analysis.Verifier; import org.elasticsearch.xpack.esql.core.expression.function.FunctionRegistry; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext; @@ -20,8 +19,8 @@ import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.Mapper; import org.elasticsearch.xpack.esql.session.EsqlConfiguration; -import org.elasticsearch.xpack.esql.session.EsqlIndexResolver; import org.elasticsearch.xpack.esql.session.EsqlSession; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.QueryMetric; @@ -30,16 +29,14 @@ public class PlanExecutor { private final IndexResolver indexResolver; - private final EsqlIndexResolver esqlIndexResolver; private final PreAnalyzer preAnalyzer; private final FunctionRegistry functionRegistry; private final Mapper mapper; private final Metrics metrics; private final Verifier verifier; - public PlanExecutor(IndexResolver indexResolver, EsqlIndexResolver esqlIndexResolver) { + public PlanExecutor(IndexResolver indexResolver) { this.indexResolver = indexResolver; - this.esqlIndexResolver = esqlIndexResolver; this.preAnalyzer = new PreAnalyzer(); this.functionRegistry = new EsqlFunctionRegistry(); this.mapper = new Mapper(functionRegistry); @@ -58,7 +55,6 @@ public void esql( sessionId, cfg, indexResolver, - esqlIndexResolver, enrichPolicyResolver, preAnalyzer, functionRegistry, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 228ed6c5b4b32..d3b2d5c6e7646 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -57,13 +57,12 @@ import org.elasticsearch.xpack.esql.action.RestEsqlQueryAction; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.enrich.EnrichLookupOperator; import org.elasticsearch.xpack.esql.execution.PlanExecutor; import org.elasticsearch.xpack.esql.expression.function.UnsupportedAttribute; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; -import org.elasticsearch.xpack.esql.session.EsqlIndexResolver; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.lang.invoke.MethodHandles; @@ -71,7 +70,6 @@ import java.util.Collection; import java.util.List; import java.util.Objects; -import java.util.Set; import java.util.function.Predicate; import java.util.function.Supplier; @@ -110,15 +108,7 @@ public Collection createComponents(PluginServices services) { BlockFactory blockFactory = new BlockFactory(circuitBreaker, bigArrays, maxPrimitiveArrayBlockSize); setupSharedSecrets(); return List.of( - new PlanExecutor( - new IndexResolver( - services.client(), - services.clusterService().getClusterName().value(), - EsqlDataTypeRegistry.INSTANCE, - Set::of - ), - new EsqlIndexResolver(services.client(), EsqlDataTypeRegistry.INSTANCE) - ), + new PlanExecutor(new IndexResolver(services.client(), EsqlDataTypeRegistry.INSTANCE)), new ExchangeService(services.clusterService().getSettings(), services.threadPool(), ThreadPool.Names.SEARCH, blockFactory), blockFactory ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 1f5374b73466e..0589424b37d1e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -11,7 +11,6 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilities; import org.elasticsearch.common.Strings; import org.elasticsearch.common.regex.Regex; -import org.elasticsearch.core.Assertions; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -31,12 +30,9 @@ import org.elasticsearch.xpack.esql.core.expression.UnresolvedStar; import org.elasticsearch.xpack.esql.core.expression.function.FunctionRegistry; import org.elasticsearch.xpack.esql.core.index.IndexResolution; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.core.index.MappingException; import org.elasticsearch.xpack.esql.core.plan.TableIdentifier; import org.elasticsearch.xpack.esql.core.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.core.type.DataType; -import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver; @@ -59,7 +55,6 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; @@ -68,7 +63,6 @@ import java.util.stream.Collectors; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; -import static org.elasticsearch.xpack.esql.core.index.IndexResolver.UNMAPPED; import static org.elasticsearch.xpack.esql.core.util.ActionListeners.map; import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD; @@ -79,7 +73,6 @@ public class EsqlSession { private final String sessionId; private final EsqlConfiguration configuration; private final IndexResolver indexResolver; - private final EsqlIndexResolver esqlIndexResolver; private final EnrichPolicyResolver enrichPolicyResolver; private final PreAnalyzer preAnalyzer; @@ -94,7 +87,6 @@ public EsqlSession( String sessionId, EsqlConfiguration configuration, IndexResolver indexResolver, - EsqlIndexResolver esqlIndexResolver, EnrichPolicyResolver enrichPolicyResolver, PreAnalyzer preAnalyzer, FunctionRegistry functionRegistry, @@ -105,7 +97,6 @@ public EsqlSession( this.sessionId = sessionId; this.configuration = configuration; this.indexResolver = indexResolver; - this.esqlIndexResolver = esqlIndexResolver; this.enrichPolicyResolver = enrichPolicyResolver; this.preAnalyzer = preAnalyzer; this.verifier = verifier; @@ -207,12 +198,7 @@ private void preAnalyzeIndices(LogicalPlan parsed, ActionListener void preAnalyzeIndices(LogicalPlan parsed, ActionListener fieldNames, - ActionListener listener - ) { - indexResolver.resolveAsMergedMapping(indexWildcard, fieldNames, false, Map.of(), new ActionListener<>() { - @Override - public void onResponse(IndexResolution fromQl) { - esqlIndexResolver.resolveAsMergedMapping(indexWildcard, fieldNames, new ActionListener<>() { - @Override - public void onResponse(IndexResolution fromEsql) { - if (fromQl.isValid() == false) { - if (fromEsql.isValid()) { - throw new IllegalArgumentException( - "ql and esql didn't make the same resolution: validity differs " + fromQl + " != " + fromEsql - ); - } - } else { - assertSameMappings("", fromQl.get().mapping(), fromEsql.get().mapping()); - if (fromQl.get().concreteIndices().equals(fromEsql.get().concreteIndices()) == false) { - throw new IllegalArgumentException( - "ql and esql didn't make the same resolution: concrete indices differ " - + fromQl.get().concreteIndices() - + " != " - + fromEsql.get().concreteIndices() - ); - } - } - listener.onResponse(fromEsql); - } - - private void assertSameMappings(String prefix, Map fromQl, Map fromEsql) { - List qlFields = new ArrayList<>(); - qlFields.addAll(fromQl.keySet()); - Collections.sort(qlFields); - - List esqlFields = new ArrayList<>(); - esqlFields.addAll(fromEsql.keySet()); - Collections.sort(esqlFields); - if (qlFields.equals(esqlFields) == false) { - throw new IllegalArgumentException( - prefix + ": ql and esql didn't make the same resolution: fields differ \n" + qlFields + " !=\n" + esqlFields - ); - } - - for (int f = 0; f < qlFields.size(); f++) { - String name = qlFields.get(f); - EsField qlField = fromQl.get(name); - EsField esqlField = fromEsql.get(name); - - if (qlField.getProperties().isEmpty() == false || esqlField.getProperties().isEmpty() == false) { - assertSameMappings( - prefix.equals("") ? name : prefix + "." + name, - qlField.getProperties(), - esqlField.getProperties() - ); - } - - /* - * Check that the field itself is the same, skipping isAlias because - * we don't actually use it in ESQL and the EsqlIndexResolver doesn't - * produce exactly the same result. - */ - if (qlField.getDataType().equals(DataType.UNSUPPORTED) == false - && qlField.getName().equals(esqlField.getName()) == false - // QL uses full paths for unsupported fields. ESQL does not. This particular difference is fine. - ) { - throw new IllegalArgumentException( - prefix - + "." - + name - + ": ql and esql didn't make the same resolution: names differ [" - + qlField.getName() - + "] != [" - + esqlField.getName() - + "]" - ); - } - if (qlField.getDataType() != esqlField.getDataType()) { - throw new IllegalArgumentException( - prefix - + "." - + name - + ": ql and esql didn't make the same resolution: types differ [" - + qlField.getDataType() - + "] != [" - + esqlField.getDataType() - + "]" - ); - } - if (qlField.isAggregatable() != esqlField.isAggregatable()) { - throw new IllegalArgumentException( - prefix - + "." - + name - + ": ql and esql didn't make the same resolution: aggregability differ [" - + qlField.isAggregatable() - + "] != [" - + esqlField.isAggregatable() - + "]" - ); - } - } - } - - @Override - public void onFailure(Exception e) { - listener.onFailure(e); - } - }); - } - - @Override - public void onFailure(Exception e) { - listener.onFailure(e); - } - }, - EsqlSession::specificValidity, - IndexResolver.PRESERVE_PROPERTIES, - // TODO no matter what metadata fields are asked in a query, the "allowedMetadataFields" is always _index, does it make - // sense to reflect the actual list of metadata fields instead? - IndexResolver.INDEX_METADATA_FIELD - ); - } - static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchFields) { if (false == parsed.anyMatch(plan -> plan instanceof Aggregate || plan instanceof Project)) { // no explicit columns selection, for example "from employees" @@ -476,14 +332,14 @@ public void optimizedPhysicalPlan(LogicalPlan logicalPlan, ActionListener types) { - boolean hasUnmapped = types.containsKey(UNMAPPED); + boolean hasUnmapped = types.containsKey(IndexResolver.UNMAPPED); boolean hasTypeConflicts = types.size() > (hasUnmapped ? 2 : 1); String metricConflictsTypeName = null; boolean hasMetricConflicts = false; if (hasTypeConflicts == false) { for (Map.Entry type : types.entrySet()) { - if (UNMAPPED.equals(type.getKey())) { + if (IndexResolver.UNMAPPED.equals(type.getKey())) { continue; } if (type.getValue().metricConflictsIndices() != null && type.getValue().metricConflictsIndices().length > 0) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlIndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java similarity index 91% rename from x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlIndexResolver.java rename to x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index f973983e47f39..983a45f36169e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlIndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -11,13 +11,13 @@ import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; +import org.elasticsearch.action.support.IndicesOptions; import org.elasticsearch.client.internal.Client; import org.elasticsearch.common.Strings; import org.elasticsearch.index.mapper.TimeSeriesParams; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.esql.core.index.EsIndex; import org.elasticsearch.xpack.esql.core.index.IndexResolution; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.DataTypeRegistry; import org.elasticsearch.xpack.esql.core.type.DateEsField; @@ -43,11 +43,30 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; -public class EsqlIndexResolver { +public class IndexResolver { + public static final Set ALL_FIELDS = Set.of("*"); + public static final Set INDEX_METADATA_FIELD = Set.of("_index"); + public static final String UNMAPPED = "unmapped"; + + public static final IndicesOptions FIELD_CAPS_INDICES_OPTIONS = IndicesOptions.builder() + .concreteTargetOptions(IndicesOptions.ConcreteTargetOptions.ALLOW_UNAVAILABLE_TARGETS) + .wildcardOptions( + IndicesOptions.WildcardOptions.builder() + .matchOpen(true) + .matchClosed(false) + .includeHidden(false) + .allowEmptyExpressions(true) + .resolveAliases(true) + ) + .gatekeeperOptions( + IndicesOptions.GatekeeperOptions.builder().ignoreThrottled(true).allowClosedIndices(true).allowAliasToMultipleIndices(true) + ) + .build(); + private final Client client; private final DataTypeRegistry typeRegistry; - public EsqlIndexResolver(Client client, DataTypeRegistry typeRegistry) { + public IndexResolver(Client client, DataTypeRegistry typeRegistry) { this.client = client; this.typeRegistry = typeRegistry; } @@ -245,7 +264,7 @@ private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set req.includeUnmapped(true); // lenient because we throw our own errors looking at the response e.g. if something was not resolved // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable - req.indicesOptions(IndexResolver.FIELD_CAPS_INDICES_OPTIONS); + req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS); req.setMergeResults(false); return req; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 975d8e1c7d7b8..794bdc23f08c5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -53,7 +53,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Row; import org.elasticsearch.xpack.esql.plan.logical.local.EsqlProject; import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; -import org.elasticsearch.xpack.esql.session.EsqlIndexResolver; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import java.io.IOException; @@ -2106,7 +2106,7 @@ protected List filteredWarnings() { private static LogicalPlan analyzeWithEmptyFieldCapsResponse(String query) throws IOException { List idxResponses = List.of(new FieldCapabilitiesIndexResponse("idx", "idx", Map.of(), true)); FieldCapabilitiesResponse caps = new FieldCapabilitiesResponse(idxResponses, List.of()); - IndexResolution resolution = new EsqlIndexResolver(null, EsqlDataTypeRegistry.INSTANCE).mergedMappings("test*", caps); + IndexResolution resolution = new IndexResolver(null, EsqlDataTypeRegistry.INSTANCE).mergedMappings("test*", caps); var analyzer = analyzer(resolution, TEST_VERIFIER, configuration(query)); return analyze(query, analyzer); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java index 90fca14b7b06d..9f81437bd1b77 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolverTests.java @@ -11,10 +11,12 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionRunnable; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.fieldcaps.FieldCapabilities; +import org.elasticsearch.action.fieldcaps.FieldCapabilitiesIndexResponse; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesRequest; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesResponse; +import org.elasticsearch.action.fieldcaps.IndexFieldCapabilities; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.FilterClient; import org.elasticsearch.cluster.ClusterName; @@ -36,8 +38,8 @@ import org.elasticsearch.xpack.core.enrich.EnrichMetadata; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import org.junit.After; import org.junit.Before; @@ -416,7 +418,7 @@ class TestEnrichPolicyResolver extends EnrichPolicyResolver { super( mockClusterService(policies), transports.get(cluster), - new IndexResolver(new FieldCapsClient(threadPool, aliases, mappings), cluster, EsqlDataTypeRegistry.INSTANCE, Set::of) + new IndexResolver(new FieldCapsClient(threadPool, aliases, mappings), EsqlDataTypeRegistry.INSTANCE) ); this.policies = policies; this.cluster = cluster; @@ -483,30 +485,19 @@ protected void String alias = aliases.get(r.indices()[0]); assertNotNull(alias); Map mapping = mappings.get(alias); + final FieldCapabilitiesResponse response; if (mapping != null) { - Map> fieldCaps = new HashMap<>(); + Map fieldCaps = new HashMap<>(); for (Map.Entry e : mapping.entrySet()) { - var f = new FieldCapabilities( - e.getKey(), - e.getValue(), - false, - false, - false, - true, - null, - new String[] { alias }, - null, - null, - null, - null, - Map.of() - ); - fieldCaps.put(e.getKey(), Map.of(e.getValue(), f)); + var f = new IndexFieldCapabilities(e.getKey(), e.getValue(), false, false, false, false, null, Map.of()); + fieldCaps.put(e.getKey(), f); } - listener.onResponse((Response) new FieldCapabilitiesResponse(new String[] { alias }, fieldCaps)); + var indexResponse = new FieldCapabilitiesIndexResponse(alias, null, fieldCaps, true); + response = new FieldCapabilitiesResponse(List.of(indexResponse), List.of()); } else { - listener.onResponse((Response) new FieldCapabilitiesResponse(new String[0], Map.of())); + response = new FieldCapabilitiesResponse(List.of(), List.of()); } + threadPool().executor(ThreadPool.Names.SEARCH_COORDINATION).execute(ActionRunnable.supply(listener, () -> (Response) response)); } } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 8d1353cbddd42..17dca8096de0f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -13,8 +13,8 @@ import java.util.Collections; import java.util.Set; -import static org.elasticsearch.xpack.esql.core.index.IndexResolver.ALL_FIELDS; -import static org.elasticsearch.xpack.esql.core.index.IndexResolver.INDEX_METADATA_FIELD; +import static org.elasticsearch.xpack.esql.session.IndexResolver.ALL_FIELDS; +import static org.elasticsearch.xpack.esql.session.IndexResolver.INDEX_METADATA_FIELD; import static org.hamcrest.Matchers.equalTo; public class IndexResolverFieldNamesTests extends ESTestCase { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java index d3011506bb5ef..5883d41f32125 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java @@ -20,11 +20,10 @@ import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; -import org.elasticsearch.xpack.esql.core.index.IndexResolver; import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver; import org.elasticsearch.xpack.esql.execution.PlanExecutor; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; -import org.elasticsearch.xpack.esql.session.EsqlIndexResolver; +import org.elasticsearch.xpack.esql.session.IndexResolver; import org.elasticsearch.xpack.esql.type.EsqlDataTypeRegistry; import org.junit.After; import org.junit.Before; @@ -34,7 +33,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; import static org.hamcrest.Matchers.instanceOf; @@ -73,7 +71,7 @@ public void testFailedMetric() { String[] indices = new String[] { "test" }; Client qlClient = mock(Client.class); - IndexResolver idxResolver = new IndexResolver(qlClient, randomAlphaOfLength(10), EsqlDataTypeRegistry.INSTANCE, Set::of); + IndexResolver idxResolver = new IndexResolver(qlClient, EsqlDataTypeRegistry.INSTANCE); // simulate a valid field_caps response so we can parse and correctly analyze de query FieldCapabilitiesResponse fieldCapabilitiesResponse = mock(FieldCapabilitiesResponse.class); when(fieldCapabilitiesResponse.getIndices()).thenReturn(indices); @@ -87,7 +85,7 @@ public void testFailedMetric() { }).when(qlClient).fieldCaps(any(), any()); Client esqlClient = mock(Client.class); - EsqlIndexResolver esqlIndexResolver = new EsqlIndexResolver(esqlClient, EsqlDataTypeRegistry.INSTANCE); + IndexResolver indexResolver = new IndexResolver(esqlClient, EsqlDataTypeRegistry.INSTANCE); doAnswer((Answer) invocation -> { @SuppressWarnings("unchecked") ActionListener listener = (ActionListener) invocation.getArguments()[1]; @@ -96,7 +94,7 @@ public void testFailedMetric() { return null; }).when(esqlClient).fieldCaps(any(), any()); - var planExecutor = new PlanExecutor(idxResolver, esqlIndexResolver); + var planExecutor = new PlanExecutor(indexResolver); var enrichResolver = mockEnrichResolver(); var request = new EsqlQueryRequest(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java index 7dca73219d6a1..ad7be1e38681f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeRegistryTests.java @@ -14,7 +14,7 @@ import org.elasticsearch.xpack.esql.core.index.IndexResolution; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; -import org.elasticsearch.xpack.esql.session.EsqlIndexResolver; +import org.elasticsearch.xpack.esql.session.IndexResolver; import java.util.List; import java.util.Map; @@ -51,7 +51,7 @@ private void resolve(String esTypeName, TimeSeriesParams.MetricType metricType, ); FieldCapabilitiesResponse caps = new FieldCapabilitiesResponse(idxResponses, List.of()); - IndexResolution resolution = new EsqlIndexResolver(null, EsqlDataTypeRegistry.INSTANCE).mergedMappings("idx-*", caps); + IndexResolution resolution = new IndexResolver(null, EsqlDataTypeRegistry.INSTANCE).mergedMappings("idx-*", caps); EsField f = resolution.get().mapping().get(field); assertThat(f.getDataType(), equalTo(expected)); }