From 10f4dbfb8d5d9bf07bf6e407c3d11dffdd3080c4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 11 Aug 2023 16:31:05 +0000 Subject: [PATCH 001/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-6e262dbb099 --- build-tools-internal/version.properties | 2 +- docs/Versions.asciidoc | 4 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 603ae345d7824..299faf29b762a 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.10.0 -lucene = 9.7.0 +lucene = 9.8.0-snapshot-6e262dbb099 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index 466dc74d19e8e..d42e785dc1058 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -1,8 +1,8 @@ include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[] -:lucene_version: 9.7.0 -:lucene_version_path: 9_7_0 +:lucene_version: 9.8.0 +:lucene_version_path: 9_8_0 :jdk: 11.0.2 :jdk_major: 11 :build_type: tar diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ce9661d635ccf..22e3dddebe2d2 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2508,124 +2508,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From b96ed35026df7fab00eab3ffa9ada232f64f5108 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 12 Aug 2023 06:14:32 +0000 Subject: [PATCH 002/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-e888dbea451 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 299faf29b762a..0f3c0f29bda1a 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.10.0 -lucene = 9.8.0-snapshot-6e262dbb099 +lucene = 9.8.0-snapshot-e888dbea451 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 22e3dddebe2d2..52a182053fa6b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2508,124 +2508,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 21bf292a3c884f1e2572c4a0e9feb6affae904f9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 13 Aug 2023 06:16:12 +0000 Subject: [PATCH 003/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-e888dbea451 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 52a182053fa6b..18624d9110c3d 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2510,122 +2510,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From db1694ef2ca68c2738fcf586fc1fca1b9ceda5a4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 14 Aug 2023 06:17:48 +0000 Subject: [PATCH 004/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-e888dbea451 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 18624d9110c3d..4b9df75abb374 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2510,122 +2510,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From cd7d15a8462b2852a78f7a33adf1dd15af7f9d7e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 15 Aug 2023 06:13:39 +0000 Subject: [PATCH 005/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-14e0947cdb9 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 0f3c0f29bda1a..546d45f682cfb 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.10.0 -lucene = 9.8.0-snapshot-e888dbea451 +lucene = 9.8.0-snapshot-14e0947cdb9 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4b9df75abb374..a47bf3cf00631 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2508,124 +2508,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 6c558a7f0ef6eba9f97d0001facc12188295d6a1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 16 Aug 2023 06:15:07 +0000 Subject: [PATCH 006/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-14e0947cdb9 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a47bf3cf00631..4444ae0272e2a 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2510,122 +2510,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7ffd562958aa8445349f5fe2e5ed89ef0fd0b50d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 19 Aug 2023 06:12:50 +0000 Subject: [PATCH 007/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-db02a1ee780 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 546d45f682cfb..673416abcc5b4 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.10.0 -lucene = 9.8.0-snapshot-14e0947cdb9 +lucene = 9.8.0-snapshot-db02a1ee780 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4444ae0272e2a..f8d950290cd6e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2508,124 +2508,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2a4c6199ab0c043cbae0a55e2b755cc94d6dfba2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 20 Aug 2023 06:14:25 +0000 Subject: [PATCH 008/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-db02a1ee780 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f8d950290cd6e..e970060a718e3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2510,122 +2510,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From e9b34d43632ede7a84b804bca790e683b6c17915 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 21 Aug 2023 06:12:57 +0000 Subject: [PATCH 009/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-db02a1ee780 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e970060a718e3..4c6702061564a 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2510,122 +2510,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 1c5714341211c131d1385fed9e7c33ebeaf794dc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 22 Aug 2023 06:14:21 +0000 Subject: [PATCH 010/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-6a2d2c4057b --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 63b44a542b744..fb40e4e1c97c9 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-db02a1ee780 +lucene = 9.8.0-snapshot-6a2d2c4057b bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 61a10c489ea35..b4c179fe878ba 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2513,124 +2513,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 76e5d07827b1330bbdbecf60e5c73d0f10783603 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 22 Aug 2023 09:44:22 -0400 Subject: [PATCH 011/136] Fix leaf reader and vector reader overrides (#98688) * Fix leaf reader and vector reader overrides * Fix 370_profile knn dfs profiling test --- .../rest-api-spec/test/search/370_profile.yml | 2 -- .../cluster/stats/DenseVectorFieldStats.java | 4 +--- .../diskusage/IndexDiskUsageAnalyzer.java | 7 +++++-- .../org/elasticsearch/index/IndexVersion.java | 2 +- .../index/engine/TranslogDirectoryReader.java | 10 +++++----- .../index/mapper/DocumentLeafReader.java | 6 +++--- .../internal/ExitableDirectoryReader.java | 17 +++++++++-------- .../FieldUsageTrackingDirectoryReader.java | 16 +++++++--------- .../frozen/RewriteCachingDirectoryReader.java | 6 +++--- .../authz/accesscontrol/FieldSubsetReader.java | 14 +++++++++----- 10 files changed, 43 insertions(+), 41 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml index 0adc7e0caf684..38212ba59a51e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml @@ -220,8 +220,6 @@ dfs knn vector profiling: - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.score: 0 } - match: { profile.shards.0.dfs.knn.0.query.0.breakdown.compute_max_score_count: 0 } - match: { profile.shards.0.dfs.knn.0.query.0.breakdown.compute_max_score: 0 } - - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.advance_count: 0 } - - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.advance: 0 } - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.build_scorer_count: 0 } - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.build_scorer: 0 } - gt: { profile.shards.0.dfs.knn.0.query.0.breakdown.create_weight: 0 } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/DenseVectorFieldStats.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/DenseVectorFieldStats.java index c565ac8bbb95b..e27972a60cbc9 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/DenseVectorFieldStats.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/DenseVectorFieldStats.java @@ -15,8 +15,6 @@ import java.io.IOException; import java.util.Objects; -import static org.apache.lucene.index.VectorValues.MAX_DIMENSIONS; - /** * Holds enhanced stats about a dense vector mapped field. */ @@ -28,7 +26,7 @@ public final class DenseVectorFieldStats extends FieldStats { DenseVectorFieldStats(String name) { super(name); indexedVectorCount = 0; - indexedVectorDimMin = MAX_DIMENSIONS; + indexedVectorDimMin = 1024; indexedVectorDimMax = 0; } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java index 1b5d584914381..f232591a05a68 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java @@ -41,6 +41,8 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.KnnCollector; +import org.apache.lucene.search.TopKnnCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; @@ -525,6 +527,7 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I for (FieldInfo field : reader.getFieldInfos()) { cancellationChecker.checkForCancellation(); directory.resetBytesRead(); + final KnnCollector collector = new TopKnnCollector(100, Integer.MAX_VALUE); if (field.getVectorDimension() > 0) { switch (field.getVectorEncoding()) { case BYTE -> { @@ -542,7 +545,7 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I break; } cancellationChecker.checkForCancellation(); - vectorReader.search(field.name, vectorValues.vectorValue(), 100, null, Integer.MAX_VALUE); + vectorReader.search(field.name, vectorValues.vectorValue(), collector, null); } stats.addKnnVectors(field.name, directory.getBytesRead()); } @@ -561,7 +564,7 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I break; } cancellationChecker.checkForCancellation(); - vectorReader.search(field.name, vectorValues.vectorValue(), 100, null, Integer.MAX_VALUE); + vectorReader.search(field.name, vectorValues.vectorValue(), collector, null); } stats.addKnnVectors(field.name, directory.getBytesRead()); } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersion.java b/server/src/main/java/org/elasticsearch/index/IndexVersion.java index a70bea87850ad..a25a25db38c18 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersion.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersion.java @@ -180,7 +180,7 @@ private static IndexVersion registerIndexVersion(int id, Version luceneVersion, public static final IndexVersion V_8_9_1 = registerIndexVersion(8_09_01_99, Version.LUCENE_9_7_0, "955a80ac-f70c-40a5-9399-1d8a1e5d342d"); public static final IndexVersion V_8_9_2 = registerIndexVersion(8_09_02_99, Version.LUCENE_9_7_0, "14c7d64c-9e25-4265-b4fa-e0c5aca67f14"); public static final IndexVersion V_8_10_0 = registerIndexVersion(8_10_00_99, Version.LUCENE_9_7_0, "2e107286-12ad-4c51-9a6f-f8943663b6e7"); - public static final IndexVersion V_8_11_0 = registerIndexVersion(8_11_00_99, Version.LUCENE_9_7_0, "f08382c0-06ab-41f4-a56a-cf5397275627"); + public static final IndexVersion V_8_11_0 = registerIndexVersion(8_11_00_99, Version.LUCENE_9_8_0, "f08382c0-06ab-41f4-a56a-cf5397275627"); /* * READ THE JAVADOC ABOVE BEFORE ADDING NEW INDEX VERSIONS * Detached index versions added below here. diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java index b531be35cda6e..a09810750c66e 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java @@ -39,7 +39,7 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.store.ByteBuffersDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -359,13 +359,13 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - return getDelegate().searchNearestVectors(field, target, k, acceptDocs, visitedLimit); + public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + getDelegate().searchNearestVectors(field, target, collector, acceptDocs); } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - return getDelegate().searchNearestVectors(field, target, k, acceptDocs, visitedLimit); + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + getDelegate().searchNearestVectors(field, target, collector, acceptDocs); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java index d812fc325189e..49934776bc4a3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java @@ -33,7 +33,7 @@ import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -208,7 +208,7 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) { + public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) { throw new UnsupportedOperationException(); } @@ -258,7 +258,7 @@ public ByteVectorValues getByteVectorValues(String field) { } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) { + public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) { throw new UnsupportedOperationException(); } diff --git a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java index 6bfba10e93b89..794e429bbc473 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java @@ -22,7 +22,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.suggest.document.CompletionTerms; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -137,9 +137,10 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { if (queryCancellation.isEnabled() == false) { - return in.searchNearestVectors(field, target, k, acceptDocs, visitedLimit); + in.searchNearestVectors(field, target, collector, acceptDocs); + return; } // when acceptDocs is null due to no doc deleted, we will instantiate a new one that would // match all docs to allow timeout checking. @@ -162,8 +163,7 @@ public int length() { return updatedAcceptDocs.length(); } }; - - return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit); + in.searchNearestVectors(field, target, collector, timeoutCheckingAcceptDocs); } @Override @@ -176,9 +176,10 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { if (queryCancellation.isEnabled() == false) { - return in.searchNearestVectors(field, target, k, acceptDocs, visitedLimit); + in.searchNearestVectors(field, target, collector, acceptDocs); + return; } // when acceptDocs is null due to no doc deleted, we will instantiate a new one that would // match all docs to allow timeout checking. @@ -202,7 +203,7 @@ public int length() { } }; - return in.searchNearestVectors(field, target, k, timeoutCheckingAcceptDocs, visitedLimit); + in.searchNearestVectors(field, target, collector, acceptDocs); } } diff --git a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java index e3936102d202d..5dc0374b73fc6 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java @@ -29,7 +29,7 @@ import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.suggest.document.CompletionTerms; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -234,21 +234,19 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs, visitedLimit); - if (topDocs != null) { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + super.searchNearestVectors(field, target, collector, acceptDocs); + if (collector.visitedCount() > 0) { notifier.onKnnVectorsUsed(field); } - return topDocs; } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - TopDocs topDocs = super.searchNearestVectors(field, target, k, acceptDocs, visitedLimit); - if (topDocs != null) { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + super.searchNearestVectors(field, target, collector, acceptDocs); + if (collector.visitedCount() > 0) { notifier.onKnnVectorsUsed(field); } - return topDocs; } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java index 768fea6218ea2..e66d41d089437 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java @@ -27,7 +27,7 @@ import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -225,12 +225,12 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { throw new UnsupportedOperationException(); } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { throw new UnsupportedOperationException(); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java index 84e346611910f..046ccc3037a05 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java @@ -28,7 +28,7 @@ import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FilterIterator; @@ -314,8 +314,10 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - return hasField(field) ? super.searchNearestVectors(field, target, k, acceptDocs, visitedLimit) : null; + public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + if (hasField(field)) { + super.searchNearestVectors(field, target, collector, acceptDocs); + } } @Override @@ -324,8 +326,10 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public TopDocs searchNearestVectors(String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { - return hasField(field) ? super.searchNearestVectors(field, target, k, acceptDocs, visitedLimit) : null; + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + if (hasField(field)) { + super.searchNearestVectors(field, target, collector, acceptDocs); + } } // we share core cache keys (for e.g. fielddata) From 743e18f81a7514b231a32cf5fb8960da0593c8e8 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 22 Aug 2023 12:36:34 -0400 Subject: [PATCH 012/136] Fix FieldSubsetReaderTests assumptions for searching knnVector values (#98736) * Fix FieldSubsetReaderTests assumptions for searching knnVector values * Use `finish()` in completionSuggestor --- .../search/suggest/completion/CompletionSuggester.java | 1 + .../authz/accesscontrol/FieldSubsetReaderTests.java | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java index c967d7683f0b2..979aac5a2349c 100644 --- a/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java +++ b/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java @@ -87,6 +87,7 @@ private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSu } } } + collector.finish(); } @Override diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java index 0f8003b107e6a..c2d51680c3146 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java @@ -209,7 +209,9 @@ public void testKnnVectors() throws Exception { // Check that we can't see fieldB assertNull(leafReader.getFloatVectorValues("fieldB")); - assertNull(leafReader.searchNearestVectors("fieldB", new float[] { 1.0f, 1.0f, 1.0f }, 5, null, Integer.MAX_VALUE)); + topDocs = leafReader.searchNearestVectors("fieldB", new float[] { 1.0f, 1.0f, 1.0f }, 5, null, Integer.MAX_VALUE); + assertEquals(0, topDocs.totalHits.value); + assertEquals(0, topDocs.scoreDocs.length); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); @@ -241,7 +243,9 @@ public void testKnnByteVectors() throws Exception { // Check that we can't see fieldB assertNull(leafReader.getByteVectorValues("fieldB")); - assertNull(leafReader.searchNearestVectors("fieldB", new byte[] { 1, 1, 1 }, 5, null, Integer.MAX_VALUE)); + topDocs = leafReader.searchNearestVectors("fieldB", new byte[] { 1, 1, 1 }, 5, null, Integer.MAX_VALUE); + assertEquals(0, topDocs.totalHits.value); + assertEquals(0, topDocs.scoreDocs.length); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); From ca6ee795046aed0a1e14f444301440c4abf0b596 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 24 Aug 2023 06:14:31 +0000 Subject: [PATCH 013/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-fb9699993c3 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 1b72694f02764..8d69f0958800e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-6a2d2c4057b +lucene = 9.8.0-snapshot-fb9699993c3 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7af49d84ca778..92c4b7c954916 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2518,124 +2518,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From d08fff51faf83f167ef387952e1c16f666d9538d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 25 Aug 2023 06:15:00 +0000 Subject: [PATCH 014/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-fb9699993c3 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 92c4b7c954916..cd3d854ae15fe 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2520,122 +2520,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 5e0113c6f95496c77d4f3b55649298aeac6f1378 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 27 Aug 2023 06:13:24 +0000 Subject: [PATCH 015/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-fb9699993c3 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 8b49111586e4b..0e0a939d9b57f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From a5c757a748e3a0f574db2538ae65e543ba61aa24 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 29 Aug 2023 06:14:32 +0000 Subject: [PATCH 016/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-fb9699993c3 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 99b9293fcf071..88ceb667b3a97 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From e39310cefd75230658aebf977609817b0eeb29df Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 31 Aug 2023 06:15:14 +0000 Subject: [PATCH 017/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-bb7e43be5a2 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 8d69f0958800e..593b390499c1b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-fb9699993c3 +lucene = 9.8.0-snapshot-bb7e43be5a2 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 88ceb667b3a97..9cd874cdce89b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 3f9e6a466c984b54dce3b31dda6387e09d81a9d5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 1 Sep 2023 06:13:53 +0000 Subject: [PATCH 018/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-bb7e43be5a2 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9cd874cdce89b..c19b7a904f6e1 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 34255493ea4ad4b8cfabeea9171ea770bf7ebb8c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 2 Sep 2023 06:14:19 +0000 Subject: [PATCH 019/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-7fc939753d4 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 593b390499c1b..4f46e0782f522 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-bb7e43be5a2 +lucene = 9.8.0-snapshot-7fc939753d4 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c19b7a904f6e1..b2eeefaac7fc8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 9bb4188735127331d2efd5c01e0b7084c349a478 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 5 Sep 2023 06:14:40 +0000 Subject: [PATCH 020/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-7fc939753d4 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b2eeefaac7fc8..ee087b446864b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From b69e504c6cced1649ec2b518c832606f9d4706d4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 5 Sep 2023 12:39:11 +0200 Subject: [PATCH 021/136] Fix Lucene version of current version to be the latest Lucene version. --- server/src/main/java/org/elasticsearch/index/IndexVersion.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersion.java b/server/src/main/java/org/elasticsearch/index/IndexVersion.java index f7c9264a7b4db..6eb88a5c6c500 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersion.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersion.java @@ -121,7 +121,7 @@ private static IndexVersion registerIndexVersion(int id, Version luceneVersion, * READ THE COMMENT BELOW THIS BLOCK OF DECLARATIONS BEFORE ADDING NEW INDEX VERSIONS * Detached index versions added below here. */ - public static final IndexVersion V_8_500_000 = registerIndexVersion(8_500_000, Version.LUCENE_9_7_0, "bf656f5e-5808-4eee-bf8a-e2bf6736ff55"); + public static final IndexVersion V_8_500_000 = registerIndexVersion(8_500_000, Version.LUCENE_9_8_0, "bf656f5e-5808-4eee-bf8a-e2bf6736ff55"); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ From 6d0aebc6c0f35203d19cd34382af133b05e9416d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 6 Sep 2023 06:16:36 +0000 Subject: [PATCH 022/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-460b27ca9d7 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 4f46e0782f522..cd4b40e618284 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-7fc939753d4 +lucene = 9.8.0-snapshot-460b27ca9d7 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ee087b446864b..f722d516095eb 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 1fb5411d8d3b0e8619a471b9560f55dc1e9b5e46 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 6 Sep 2023 11:52:57 +0200 Subject: [PATCH 023/136] Fix test expectations: all tasks now get offloaded to the executor. (#99227) See apache/lucene#12515. --- .../search/internal/ContextIndexSearcherTests.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java index 01a60cc5060e9..5dbbe134c1d85 100644 --- a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java @@ -231,9 +231,7 @@ public void testConcurrentRewrite() throws Exception { assertEquals(numSegments, searcher.slices(directoryReader.getContext().leaves()).length); KnnFloatVectorQuery vectorQuery = new KnnFloatVectorQuery("float_vector", new float[] { 0, 0, 0 }, 10, null); vectorQuery.rewrite(searcher); - // Note: we expect one execute call less than segments since the last is executed on the caller thread, but no additional - // exceptions to the offloading of operations. For details see QueueSizeBasedExecutor#processTask. - assertBusy(() -> assertEquals(numSegments - 1, executor.getCompletedTaskCount())); + assertBusy(() -> assertEquals(numSegments, executor.getCompletedTaskCount())); } } finally { terminate(executor); From e7356a068001af7172379655a8354f7122f0f7ff Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 6 Sep 2023 16:53:12 +0200 Subject: [PATCH 024/136] Remove now useless RegExp wrapper. (#99226) Parsing regexps no longer raises stack overflows thanks to apache/lucene#12462. --- .../forbidden/es-server-signatures.txt | 3 - .../elasticsearch/tracing/apm/APMTracer.java | 2 +- .../elasticsearch/common/lucene/RegExp.java | 135 ------------------ .../indices/AssociatedIndexDescriptor.java | 2 +- .../indices/SystemIndexDescriptor.java | 2 +- .../bucket/terms/IncludeExclude.java | 2 +- .../runtime/StringScriptFieldRegexpQuery.java | 2 +- .../support/IncludeExcludeTests.java | 9 -- .../core/security/support/Automatons.java | 2 +- .../mapper/ConstantKeywordFieldMapper.java | 2 +- .../predicate/regex/RLikePattern.java | 2 +- .../wildcard/mapper/WildcardFieldMapper.java | 26 ++-- 12 files changed, 21 insertions(+), 168 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/common/lucene/RegExp.java diff --git a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt index c22339be5332f..1f6ae19a5e931 100644 --- a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt +++ b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt @@ -62,9 +62,6 @@ org.apache.lucene.index.NoMergePolicy#INSTANCE @ explicit use of NoMergePolicy r org.apache.lucene.search.TimeLimitingCollector#getGlobalTimerThread() org.apache.lucene.search.TimeLimitingCollector#getGlobalCounter() -@defaultMessage use @org.elasticsearch.common.lucene.RegExp instead to avoid StackOverflowError -org.apache.lucene.util.automaton.RegExp - @defaultMessage Don't interrupt threads use FutureUtils#cancel(Future) instead java.util.concurrent.Future#cancel(boolean) diff --git a/modules/apm/src/main/java/org/elasticsearch/tracing/apm/APMTracer.java b/modules/apm/src/main/java/org/elasticsearch/tracing/apm/APMTracer.java index 09eff0c820745..56b0401660d79 100644 --- a/modules/apm/src/main/java/org/elasticsearch/tracing/apm/APMTracer.java +++ b/modules/apm/src/main/java/org/elasticsearch/tracing/apm/APMTracer.java @@ -24,9 +24,9 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.Version; import org.elasticsearch.common.component.AbstractLifecycleComponent; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.Maps; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; diff --git a/server/src/main/java/org/elasticsearch/common/lucene/RegExp.java b/server/src/main/java/org/elasticsearch/common/lucene/RegExp.java deleted file mode 100644 index 90b32197b00fc..0000000000000 --- a/server/src/main/java/org/elasticsearch/common/lucene/RegExp.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the Server Side Public License, v 1; you may not use this file except - * in compliance with, at your election, the Elastic License 2.0 or the Server - * Side Public License, v 1. - */ - -package org.elasticsearch.common.lucene; - -import org.apache.lucene.util.automaton.Automaton; -import org.elasticsearch.core.SuppressForbidden; - -/** - * Simple wrapper for {@link org.apache.lucene.util.automaton.RegExp} that - * avoids throwing {@link StackOverflowError}s when working with regular - * expressions as this will crash an Elasticsearch node. Instead, these - * {@linkplain StackOverflowError}s are turned into - * {@link IllegalArgumentException}s which elasticsearch returns as - * a http 400. - */ -public class RegExp { - - @SuppressForbidden(reason = "this class is the trusted wrapper") - private final org.apache.lucene.util.automaton.RegExp wrapped; - - @SuppressForbidden(reason = "catches StackOverflowError") - public RegExp(String s) { - try { - wrapped = new org.apache.lucene.util.automaton.RegExp(s); - } catch (StackOverflowError e) { - throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s); - } - } - - @SuppressForbidden(reason = "catches StackOverflowError") - public RegExp(String s, int syntax_flags) { - try { - wrapped = new org.apache.lucene.util.automaton.RegExp(s, syntax_flags); - } catch (StackOverflowError e) { - throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s); - } - } - - @SuppressForbidden(reason = "catches StackOverflowError") - public RegExp(String s, int syntax_flags, int match_flags) { - try { - wrapped = new org.apache.lucene.util.automaton.RegExp(s, syntax_flags, match_flags); - } catch (StackOverflowError e) { - throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + s); - } - } - - @SuppressForbidden(reason = "we are the trusted wrapper") - private RegExp(org.apache.lucene.util.automaton.RegExp wrapped) { - this.wrapped = wrapped; - } - - @SuppressForbidden(reason = "catches StackOverflowError") - public Automaton toAutomaton() { - try { - return wrapped.toAutomaton(); - } catch (StackOverflowError e) { - throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + this); - } - } - - @SuppressForbidden(reason = "catches StackOverflowError") - public Automaton toAutomaton(int determinizeWorkLimit) { - try { - return wrapped.toAutomaton(determinizeWorkLimit); - } catch (StackOverflowError e) { - throw new IllegalArgumentException("failed to parse regexp due to stack overflow: " + this); - } - } - - @SuppressForbidden(reason = "his class is the trusted wrapper") - public String getOriginalString() { - return wrapped.getOriginalString(); - } - - @Override - public String toString() { - // don't call wrapped.toString() to avoid StackOverflowError - return getOriginalString(); - } - - /** - * The type of expression. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public org.apache.lucene.util.automaton.RegExp.Kind kind() { - return wrapped.kind; - } - - /** - * Child expressions held by a container type expression. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public RegExp exp1() { - return new RegExp(wrapped.exp1); - } - - /** - * Child expressions held by a container type expression. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public RegExp exp2() { - return new RegExp(wrapped.exp2); - } - - /** - * Limits for repeatable type expressions. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public int min() { - return wrapped.min; - } - - /** - * String expression. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public String s() { - return wrapped.s; - } - - /** - * Character expression. - */ - @SuppressForbidden(reason = "we are the trusted wrapper") - public int c() { - return wrapped.c; - } -} diff --git a/server/src/main/java/org/elasticsearch/indices/AssociatedIndexDescriptor.java b/server/src/main/java/org/elasticsearch/indices/AssociatedIndexDescriptor.java index 8cac174e3ff60..a4944e99f01b7 100644 --- a/server/src/main/java/org/elasticsearch/indices/AssociatedIndexDescriptor.java +++ b/server/src/main/java/org/elasticsearch/indices/AssociatedIndexDescriptor.java @@ -10,8 +10,8 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.cluster.metadata.Metadata; -import org.elasticsearch.common.lucene.RegExp; import java.util.List; import java.util.Objects; diff --git a/server/src/main/java/org/elasticsearch/indices/SystemIndexDescriptor.java b/server/src/main/java/org/elasticsearch/indices/SystemIndexDescriptor.java index 4d37cea8bcf98..7bc7cc9a96ddc 100644 --- a/server/src/main/java/org/elasticsearch/indices/SystemIndexDescriptor.java +++ b/server/src/main/java/org/elasticsearch/indices/SystemIndexDescriptor.java @@ -11,6 +11,7 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.create.AutoCreateAction; import org.elasticsearch.action.admin.indices.create.TransportCreateIndexAction; @@ -18,7 +19,6 @@ import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.metadata.SystemIndexMetadataUpgradeService; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.XContentHelper; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java index 26468dc8fc54a..be4facd1fdacc 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java @@ -19,13 +19,13 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.hppc.BitMixer; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.TransportVersion; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.core.Nullable; import org.elasticsearch.search.DocValueFormat; diff --git a/server/src/main/java/org/elasticsearch/search/runtime/StringScriptFieldRegexpQuery.java b/server/src/main/java/org/elasticsearch/search/runtime/StringScriptFieldRegexpQuery.java index 6d1e6ca636c2a..02c9914f2b850 100644 --- a/server/src/main/java/org/elasticsearch/search/runtime/StringScriptFieldRegexpQuery.java +++ b/server/src/main/java/org/elasticsearch/search/runtime/StringScriptFieldRegexpQuery.java @@ -9,7 +9,7 @@ package org.elasticsearch.search.runtime; import org.apache.lucene.util.automaton.ByteRunAutomaton; -import org.elasticsearch.common.lucene.RegExp; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.script.Script; import org.elasticsearch.script.StringFieldScript; diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java index 27314a8112f1c..6dea4233ef281 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/support/IncludeExcludeTests.java @@ -8,8 +8,6 @@ package org.elasticsearch.search.aggregations.support; -import joptsimple.internal.Strings; - import org.apache.lucene.index.DocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; @@ -32,8 +30,6 @@ import java.util.SortedSet; import java.util.TreeSet; -import static org.hamcrest.Matchers.equalTo; - public class IncludeExcludeTests extends ESTestCase { public static IncludeExclude randomIncludeExclude() { @@ -390,9 +386,4 @@ public void testInvalidIncludeExcludeCombination() { expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(null, regex, null, values)); } - public void testLongIncludeExclude() { - String longString = Strings.repeat('a', 100000); - IllegalArgumentException iae = expectThrows(IllegalArgumentException.class, () -> new IncludeExclude(longString, null, null, null)); - assertThat(iae.getMessage(), equalTo("failed to parse regexp due to stack overflow: " + longString)); - } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java index 352625822a8ea..a364b9cdbb227 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/support/Automatons.java @@ -11,9 +11,9 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.MinimizationOperations; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.common.cache.Cache; import org.elasticsearch.common.cache.CacheBuilder; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.set.Sets; diff --git a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java index a530a8bf46623..ddda0bf7471e0 100644 --- a/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java +++ b/x-pack/plugin/mapper-constant-keyword/src/main/java/org/elasticsearch/xpack/constantkeyword/mapper/ConstantKeywordFieldMapper.java @@ -19,9 +19,9 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.LevenshteinAutomata; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.lucene.BytesRefs; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.time.DateMathParser; import org.elasticsearch.common.unit.Fuzziness; diff --git a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/regex/RLikePattern.java b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/regex/RLikePattern.java index 222bc66ba7911..528872ca9b4cf 100644 --- a/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/regex/RLikePattern.java +++ b/x-pack/plugin/ql/src/main/java/org/elasticsearch/xpack/ql/expression/predicate/regex/RLikePattern.java @@ -7,7 +7,7 @@ package org.elasticsearch.xpack.ql.expression.predicate.regex; import org.apache.lucene.util.automaton.Automaton; -import org.elasticsearch.common.lucene.RegExp; +import org.apache.lucene.util.automaton.RegExp; import java.util.Objects; diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 469e140eb6b76..1febf395c5d5f 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -41,12 +41,12 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.MinimizationOperations; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.RegExp; import org.elasticsearch.common.lucene.search.AutomatonQueries; import org.elasticsearch.common.time.DateMathParser; import org.elasticsearch.common.unit.Fuzziness; @@ -402,7 +402,7 @@ public Query regexpQuery( // * Anything else is a concrete query that should be run on the ngram index. public static Query toApproximationQuery(RegExp r) throws IllegalArgumentException { Query result = null; - switch (r.kind()) { + switch (r.kind) { case REGEXP_UNION: result = createUnionQuery(r); break; @@ -410,11 +410,11 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti result = createConcatenationQuery(r); break; case REGEXP_STRING: - String normalizedString = toLowerCase(r.s()); + String normalizedString = toLowerCase(r.s); result = new TermQuery(new Term("", normalizedString)); break; case REGEXP_CHAR: - String cs = Character.toString(r.c()); + String cs = Character.toString(r.c); String normalizedChar = toLowerCase(cs); result = new TermQuery(new Term("", normalizedChar)); break; @@ -425,8 +425,8 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti case REGEXP_REPEAT_MIN: case REGEXP_REPEAT_MINMAX: - if (r.min() > 0) { - result = toApproximationQuery(r.exp1()); + if (r.min > 0) { + result = toApproximationQuery(r.exp1); if (result instanceof TermQuery) { // Wrap the repeating expression so that it is not concatenated by a parent which concatenates // plain TermQuery objects together. Boolean queries are interpreted as a black box and not @@ -465,8 +465,8 @@ public static Query toApproximationQuery(RegExp r) throws IllegalArgumentExcepti private static Query createConcatenationQuery(RegExp r) { // Create ANDs of expressions plus collapse consecutive TermQuerys into single longer ones ArrayList queries = new ArrayList<>(); - findLeaves(r.exp1(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries); - findLeaves(r.exp2(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries); + findLeaves(r.exp1, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries); + findLeaves(r.exp2, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_CONCATENATION, queries); BooleanQuery.Builder bAnd = new BooleanQuery.Builder(); StringBuilder sequence = new StringBuilder(); for (Query query : queries) { @@ -495,8 +495,8 @@ private static Query createConcatenationQuery(RegExp r) { private static Query createUnionQuery(RegExp r) { // Create an OR of clauses ArrayList queries = new ArrayList<>(); - findLeaves(r.exp1(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries); - findLeaves(r.exp2(), org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries); + findLeaves(r.exp1, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries); + findLeaves(r.exp2, org.apache.lucene.util.automaton.RegExp.Kind.REGEXP_UNION, queries); BooleanQuery.Builder bOr = new BooleanQuery.Builder(); HashSet uniqueClauses = new HashSet<>(); for (Query query : queries) { @@ -520,9 +520,9 @@ private static Query createUnionQuery(RegExp r) { } private static void findLeaves(RegExp exp, org.apache.lucene.util.automaton.RegExp.Kind kind, List queries) { - if (exp.kind() == kind) { - findLeaves(exp.exp1(), kind, queries); - findLeaves(exp.exp2(), kind, queries); + if (exp.kind == kind) { + findLeaves(exp.exp1, kind, queries); + findLeaves(exp.exp2, kind, queries); } else { queries.add(toApproximationQuery(exp)); } From f4d4d43b0c0a7cf6967a9ec11bacecf35bbf6173 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 6 Sep 2023 21:38:57 +0200 Subject: [PATCH 025/136] Update the way that the maximum vector dimension is configured. (#99225) The maximum supported dimension used to be configured on the FieldType, it's now on the codec. --- .../index/codec/PerFieldMapperCodec.java | 5 +- .../vectors/DenseVectorFieldMapper.java | 75 +++++++++---------- .../vectors/DenseVectorFieldMapperTests.java | 2 - .../index/mapper/MapperServiceTestCase.java | 6 +- 4 files changed, 42 insertions(+), 46 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index d30a91d2ae4d0..9e25c33580212 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -91,10 +91,7 @@ boolean useBloomFilter(String field) { public KnnVectorsFormat getKnnVectorsFormatForField(String field) { Mapper mapper = mapperService.mappingLookup().getMapper(field); if (mapper instanceof DenseVectorFieldMapper vectorMapper) { - KnnVectorsFormat format = vectorMapper.getKnnVectorsFormatForField(); - if (format != null) { - return format; - } + return vectorMapper.getKnnVectorsFormatForField(super.getKnnVectorsFormatForField(field)); } return super.getKnnVectorsFormatForField(field); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 660ad23483186..4b489bba68f84 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -9,6 +9,8 @@ package org.elasticsearch.index.mapper.vectors; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; @@ -19,6 +21,8 @@ import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.FieldExistsQuery; @@ -198,40 +202,6 @@ public DenseVectorFieldMapper build(MapperBuilderContext context) { } } - private static FieldType getDenseVectorFieldType( - int dimension, - VectorEncoding vectorEncoding, - VectorSimilarityFunction similarityFunction - ) { - if (dimension == 0) { - throw new IllegalArgumentException("cannot index an empty vector"); - } - if (dimension > DenseVectorFieldMapper.MAX_DIMS_COUNT) { - throw new IllegalArgumentException("cannot index vectors with dimension greater than " + DenseVectorFieldMapper.MAX_DIMS_COUNT); - } - if (similarityFunction == null) { - throw new IllegalArgumentException("similarity function must not be null"); - } - FieldType fieldType = new FieldType() { - @Override - public int vectorDimension() { - return dimension; - } - - @Override - public VectorEncoding vectorEncoding() { - return vectorEncoding; - } - - @Override - public VectorSimilarityFunction vectorSimilarityFunction() { - return similarityFunction; - } - }; - fieldType.freeze(); - return fieldType; - } - public enum ElementType { BYTE(1) { @@ -256,7 +226,9 @@ KnnByteVectorField createKnnVectorField(String name, byte[] vector, VectorSimila if (vector == null) { throw new IllegalArgumentException("vector value must not be null"); } - FieldType denseVectorFieldType = getDenseVectorFieldType(vector.length, VectorEncoding.BYTE, function); + FieldType denseVectorFieldType = new FieldType(); + denseVectorFieldType.setVectorAttributes(vector.length, VectorEncoding.BYTE, function); + denseVectorFieldType.freeze(); return new KnnByteVectorField(name, vector, denseVectorFieldType); } @@ -456,7 +428,9 @@ KnnFloatVectorField createKnnVectorField(String name, float[] vector, VectorSimi if (vector == null) { throw new IllegalArgumentException("vector value must not be null"); } - FieldType denseVectorFieldType = getDenseVectorFieldType(vector.length, VectorEncoding.FLOAT32, function); + FieldType denseVectorFieldType = new FieldType(); + denseVectorFieldType.setVectorAttributes(vector.length, VectorEncoding.FLOAT32, function); + denseVectorFieldType.freeze(); return new KnnFloatVectorField(name, vector, denseVectorFieldType); } @@ -1135,13 +1109,36 @@ private static IndexOptions parseIndexOptions(String fieldName, Object propNode) * @return the custom kNN vectors format that is configured for this field or * {@code null} if the default format should be used. */ - public KnnVectorsFormat getKnnVectorsFormatForField() { + public KnnVectorsFormat getKnnVectorsFormatForField(KnnVectorsFormat defaultFormat) { + KnnVectorsFormat format; if (indexOptions == null) { - return null; // use default format + format = defaultFormat; } else { HnswIndexOptions hnswIndexOptions = (HnswIndexOptions) indexOptions; - return new Lucene95HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); + format = new Lucene95HnswVectorsFormat(hnswIndexOptions.m, hnswIndexOptions.efConstruction); } + // It's legal to reuse the same format name as this is the same on-disk format. + return new KnnVectorsFormat(format.getName()) { + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return format.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return format.fieldsReader(state); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return format.toString(); + } + }; } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 93b198a7f4099..2c2e4318f6e64 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -12,7 +12,6 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.KnnVectorsFormat; -import org.apache.lucene.codecs.lucene95.Lucene95HnswVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; @@ -928,7 +927,6 @@ public void testKnnVectorsFormat() throws IOException { Codec codec = codecService.codec("default"); assertThat(codec, instanceOf(PerFieldMapperCodec.class)); KnnVectorsFormat knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); - assertThat(knnVectorsFormat, instanceOf(Lucene95HnswVectorsFormat.class)); String expectedString = "Lucene95HnswVectorsFormat(name=Lucene95HnswVectorsFormat, maxConn=" + m + ", beamWidth=" diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 371ea3f35292b..31b696036bd42 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -10,6 +10,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.codecs.lucene95.Lucene95Codec; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; @@ -41,6 +42,7 @@ import org.elasticsearch.index.analysis.NameOrDefinition; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; +import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; @@ -241,7 +243,9 @@ protected final void withLuceneIndex( CheckedConsumer builder, CheckedConsumer test ) throws IOException { - IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService)); + IndexWriterConfig iwc = new IndexWriterConfig(IndexShard.buildIndexAnalyzer(mapperService)).setCodec( + new PerFieldMapperCodec(Lucene95Codec.Mode.BEST_SPEED, mapperService, BigArrays.NON_RECYCLING_INSTANCE) + ); try (Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc)) { builder.accept(iw); try (DirectoryReader reader = iw.getReader()) { From d9c7f600920b87449c5b72a0ad65400e75520b5c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 7 Sep 2023 06:15:29 +0000 Subject: [PATCH 026/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-632d8c253bb --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index cd4b40e618284..5275dd639af17 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-460b27ca9d7 +lucene = 9.8.0-snapshot-632d8c253bb bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f722d516095eb..85bd2cbcc2001 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 34ec2acdeff689726f9550fc9711c7e592171966 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Thu, 7 Sep 2023 11:38:51 +0200 Subject: [PATCH 027/136] Remove temporary executor wrapping in ContextIndexSearcher (#99239) We recently introduced wrapping the executor (see #98408) to work around adaptive offloading of the task to the executor. It was needed only for concurrency query rewrite as we override the search method that deals with slices and have full control over it for now. Lucene has been upgraded to always execute tasks on the executor, hence the wrapping of the executor to "hide" the fact that it's a ThreadPoolExecutor is no longer necessary. --- .../search/internal/ContextIndexSearcher.java | 31 ++----------------- .../internal/ContextIndexSearcherTests.java | 1 - 2 files changed, 2 insertions(+), 30 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 6bf4267c3a42f..90ddeda87d2d3 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -63,7 +63,6 @@ import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.RunnableFuture; -import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicInteger; /** @@ -139,11 +138,8 @@ public ContextIndexSearcher( int maximumNumberOfSlices, int minimumDocsPerSlice ) throws IOException { - // we need to pass the executor up so it can potentially be used as a sliceExecutor by knn search - super( - wrapWithExitableDirectoryReader ? new ExitableDirectoryReader((DirectoryReader) reader, cancellable) : reader, - wrapExecutor(executor) - ); + // we need to pass the executor up so it can potentially be used by query rewrite, which does not rely on slicing + super(wrapWithExitableDirectoryReader ? new ExitableDirectoryReader((DirectoryReader) reader, cancellable) : reader, executor); setSimilarity(similarity); setQueryCache(queryCache); setQueryCachingPolicy(queryCachingPolicy); @@ -158,18 +154,6 @@ public ContextIndexSearcher( } } - /* - * This is a hack to work around QueueSizeBasedExecutor conditionally executing on the caller thread based on queue size. - * We'd rather simply offload all the tasks to the executor when provided. See https://github.com/apache/lucene/issues/12498 . - * We override all of that already for the collection part, but we can't do that for the query rewrite part that affects knn. - */ - private static Executor wrapExecutor(Executor executor) { - if (executor instanceof ThreadPoolExecutor) { - return executor::execute; - } - return executor; - } - // package private for testing int getMinimumDocsPerSlice() { return minimumDocsPerSlice; @@ -249,17 +233,6 @@ public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws } } - /** - * Overwrite superclass to force one slice per segment for knn search. - * This is only needed temporarily by knn query rewrite, for the main - * search collection we forked the search method and inject our own slicing logic - * until this is available in Lucene itself - */ - @Override - protected LeafSlice[] slices(List leaves) { - return IndexSearcher.slices(leaves, Math.max(1, leaves.size()), 1); - } - /** * Returns the slices created by this {@link ContextIndexSearcher}, different from those created by the base class and * returned by {@link IndexSearcher#getSlices()}. The former are used for parallelizing the collection, while the latter are used diff --git a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java index 5dbbe134c1d85..3f7d67d292761 100644 --- a/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/internal/ContextIndexSearcherTests.java @@ -228,7 +228,6 @@ public void testConcurrentRewrite() throws Exception { 1 ); int numSegments = directoryReader.getContext().leaves().size(); - assertEquals(numSegments, searcher.slices(directoryReader.getContext().leaves()).length); KnnFloatVectorQuery vectorQuery = new KnnFloatVectorQuery("float_vector", new float[] { 0, 0, 0 }, 10, null); vectorQuery.rewrite(searcher); assertBusy(() -> assertEquals(numSegments, executor.getCompletedTaskCount())); From f7c10e9eb734f87c0d05ab1a982ba684fa390063 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 8 Sep 2023 06:14:17 +0000 Subject: [PATCH 028/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-632d8c253bb --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 5b06099f553b3..e357205f00c87 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From a932b1899ea25f762584d7797d13a5a2fc2036e8 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 8 Sep 2023 08:59:46 +0200 Subject: [PATCH 029/136] Close index reader in tests (#99288) Not closing the index reader in unit tests did not cause issues so far, but it becomes a problem since we use newSearcher more often which randomly sets an executor to the searcher. This is because the executor is shutdown as part of the index reader closing listener, and if the reader is not closed, the test leaks threads which causes test failures. This is more evident in the lucene_snapshot branch as lucene 9.8 will introduce unconditional offloading of tasks to the executor when provided (including sequential execution). In main, there's many cases where even if we have an executor, it is never used because execution is sequential (single slice) hence it won't leak threads in that case. This commit fixes the unit tests that use LuceneTestCase#newSearcher to create the searcher, which may randomize the executor and need the reader to be closed as that triggers the shutdown of the executor too. --- .../painless/SimilarityScriptTests.java | 116 ++++++------- .../search/MultiPhrasePrefixQueryTests.java | 54 +++--- .../deps/lucene/SimpleLuceneTests.java | 161 +++++++++--------- 3 files changed, 171 insertions(+), 160 deletions(-) diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/SimilarityScriptTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/SimilarityScriptTests.java index bbc24b74513f6..8437d78962c0c 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/SimilarityScriptTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/SimilarityScriptTests.java @@ -55,39 +55,39 @@ public void testBasics() throws IOException { Collections.emptyMap() ); ScriptedSimilarity sim = new ScriptedSimilarity("foobar", null, "foobaz", factory::newInstance, true); - Directory dir = new ByteBuffersDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim)); + try (Directory dir = new ByteBuffersDirectory()) { + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim)); - Document doc = new Document(); - doc.add(new TextField("f", "foo bar", Store.NO)); - doc.add(new StringField("match", "no", Store.NO)); - w.addDocument(doc); + Document doc = new Document(); + doc.add(new TextField("f", "foo bar", Store.NO)); + doc.add(new StringField("match", "no", Store.NO)); + w.addDocument(doc); - doc = new Document(); - doc.add(new TextField("f", "foo foo bar", Store.NO)); - doc.add(new StringField("match", "yes", Store.NO)); - w.addDocument(doc); + doc = new Document(); + doc.add(new TextField("f", "foo foo bar", Store.NO)); + doc.add(new StringField("match", "yes", Store.NO)); + w.addDocument(doc); - doc = new Document(); - doc.add(new TextField("f", "bar", Store.NO)); - doc.add(new StringField("match", "no", Store.NO)); - w.addDocument(doc); + doc = new Document(); + doc.add(new TextField("f", "bar", Store.NO)); + doc.add(new StringField("match", "no", Store.NO)); + w.addDocument(doc); - IndexReader r = DirectoryReader.open(w); - w.close(); - IndexSearcher searcher = newSearcher(r); - searcher.setSimilarity(sim); - Query query = new BoostQuery( - new BooleanQuery.Builder().add(new TermQuery(new Term("f", "foo")), Occur.SHOULD) - .add(new TermQuery(new Term("match", "yes")), Occur.FILTER) - .build(), - 3.2f - ); - TopDocs topDocs = searcher.search(query, 1); - assertEquals(1, topDocs.totalHits.value); - assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0); - w.close(); - dir.close(); + try (IndexReader r = DirectoryReader.open(w)) { + w.close(); + IndexSearcher searcher = newSearcher(r); + searcher.setSimilarity(sim); + Query query = new BoostQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("f", "foo")), Occur.SHOULD) + .add(new TermQuery(new Term("match", "yes")), Occur.FILTER) + .build(), + 3.2f + ); + TopDocs topDocs = searcher.search(query, 1); + assertEquals(1, topDocs.totalHits.value); + assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0); + } + } } public void testWeightScript() throws IOException { @@ -104,38 +104,38 @@ public void testWeightScript() throws IOException { Collections.emptyMap() ); ScriptedSimilarity sim = new ScriptedSimilarity("foobar", weightFactory::newInstance, "foobaz", factory::newInstance, true); - Directory dir = new ByteBuffersDirectory(); - IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim)); + try (Directory dir = new ByteBuffersDirectory()) { + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setSimilarity(sim)); - Document doc = new Document(); - doc.add(new TextField("f", "foo bar", Store.NO)); - doc.add(new StringField("match", "no", Store.NO)); - w.addDocument(doc); + Document doc = new Document(); + doc.add(new TextField("f", "foo bar", Store.NO)); + doc.add(new StringField("match", "no", Store.NO)); + w.addDocument(doc); - doc = new Document(); - doc.add(new TextField("f", "foo foo bar", Store.NO)); - doc.add(new StringField("match", "yes", Store.NO)); - w.addDocument(doc); + doc = new Document(); + doc.add(new TextField("f", "foo foo bar", Store.NO)); + doc.add(new StringField("match", "yes", Store.NO)); + w.addDocument(doc); - doc = new Document(); - doc.add(new TextField("f", "bar", Store.NO)); - doc.add(new StringField("match", "no", Store.NO)); - w.addDocument(doc); + doc = new Document(); + doc.add(new TextField("f", "bar", Store.NO)); + doc.add(new StringField("match", "no", Store.NO)); + w.addDocument(doc); - IndexReader r = DirectoryReader.open(w); - w.close(); - IndexSearcher searcher = newSearcher(r); - searcher.setSimilarity(sim); - Query query = new BoostQuery( - new BooleanQuery.Builder().add(new TermQuery(new Term("f", "foo")), Occur.SHOULD) - .add(new TermQuery(new Term("match", "yes")), Occur.FILTER) - .build(), - 3.2f - ); - TopDocs topDocs = searcher.search(query, 1); - assertEquals(1, topDocs.totalHits.value); - assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0); - w.close(); - dir.close(); + try (IndexReader r = DirectoryReader.open(w)) { + w.close(); + IndexSearcher searcher = newSearcher(r); + searcher.setSimilarity(sim); + Query query = new BoostQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("f", "foo")), Occur.SHOULD) + .add(new TermQuery(new Term("match", "yes")), Occur.FILTER) + .build(), + 3.2f + ); + TopDocs topDocs = searcher.search(query, 1); + assertEquals(1, topDocs.totalHits.value); + assertEquals((float) (3.2 * 2 / 3), topDocs.scoreDocs[0].score, 0); + } + } } } diff --git a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java index bc8600a8cc05c..219242019cb45 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java @@ -25,31 +25,33 @@ public class MultiPhrasePrefixQueryTests extends ESTestCase { public void testSimple() throws Exception { - IndexWriter writer = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); - Document doc = new Document(); - doc.add(new Field("field", "aaa bbb ccc ddd", TextField.TYPE_NOT_STORED)); - writer.addDocument(doc); - IndexReader reader = DirectoryReader.open(writer); - IndexSearcher searcher = newSearcher(reader); - - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("field"); - query.add(new Term("field", "aa")); - assertThat(searcher.count(query), equalTo(1)); - - query = new MultiPhrasePrefixQuery("field"); - query.add(new Term("field", "aaa")); - query.add(new Term("field", "bb")); - assertThat(searcher.count(query), equalTo(1)); - - query = new MultiPhrasePrefixQuery("field"); - query.setSlop(1); - query.add(new Term("field", "aaa")); - query.add(new Term("field", "cc")); - assertThat(searcher.count(query), equalTo(1)); - - query = new MultiPhrasePrefixQuery("field"); - query.setSlop(1); - query.add(new Term("field", "xxx")); - assertThat(searcher.count(query), equalTo(0)); + try (IndexWriter writer = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) { + Document doc = new Document(); + doc.add(new Field("field", "aaa bbb ccc ddd", TextField.TYPE_NOT_STORED)); + writer.addDocument(doc); + try (IndexReader reader = DirectoryReader.open(writer)) { + IndexSearcher searcher = newSearcher(reader); + + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("field"); + query.add(new Term("field", "aa")); + assertThat(searcher.count(query), equalTo(1)); + + query = new MultiPhrasePrefixQuery("field"); + query.add(new Term("field", "aaa")); + query.add(new Term("field", "bb")); + assertThat(searcher.count(query), equalTo(1)); + + query = new MultiPhrasePrefixQuery("field"); + query.setSlop(1); + query.add(new Term("field", "aaa")); + query.add(new Term("field", "cc")); + assertThat(searcher.count(query), equalTo(1)); + + query = new MultiPhrasePrefixQuery("field"); + query.setSlop(1); + query.add(new Term("field", "xxx")); + assertThat(searcher.count(query), equalTo(0)); + } + } } } diff --git a/server/src/test/java/org/elasticsearch/deps/lucene/SimpleLuceneTests.java b/server/src/test/java/org/elasticsearch/deps/lucene/SimpleLuceneTests.java index 0fe25a6b74412..24e635b2f8b76 100644 --- a/server/src/test/java/org/elasticsearch/deps/lucene/SimpleLuceneTests.java +++ b/server/src/test/java/org/elasticsearch/deps/lucene/SimpleLuceneTests.java @@ -44,47 +44,55 @@ public class SimpleLuceneTests extends ESTestCase { public void testSortValues() throws Exception { - Directory dir = new ByteBuffersDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); - for (int i = 0; i < 10; i++) { - Document document = new Document(); - String text = new String(new char[] { (char) (97 + i), (char) (97 + i) }); - document.add(new TextField("str", text, Field.Store.YES)); - document.add(new SortedDocValuesField("str", new BytesRef(text))); - indexWriter.addDocument(document); - } - IndexReader reader = DirectoryReader.open(indexWriter); - IndexSearcher searcher = newSearcher(reader); - TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING))); - for (int i = 0; i < 10; i++) { - FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; - assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[] { (char) (97 + i), (char) (97 + i) })))); + try ( + Directory dir = new ByteBuffersDirectory(); + IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)) + ) { + for (int i = 0; i < 10; i++) { + Document document = new Document(); + String text = new String(new char[] { (char) (97 + i), (char) (97 + i) }); + document.add(new TextField("str", text, Field.Store.YES)); + document.add(new SortedDocValuesField("str", new BytesRef(text))); + indexWriter.addDocument(document); + } + try (IndexReader reader = DirectoryReader.open(indexWriter)) { + IndexSearcher searcher = newSearcher(reader); + TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("str", SortField.Type.STRING))); + for (int i = 0; i < 10; i++) { + FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; + assertThat( + (BytesRef) fieldDoc.fields[0], + equalTo(new BytesRef(new String(new char[] { (char) (97 + i), (char) (97 + i) }))) + ); + } + } } } public void testSimpleNumericOps() throws Exception { - Directory dir = new ByteBuffersDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); - - Document document = new Document(); - document.add(new TextField("_id", "1", Field.Store.YES)); - document.add(new IntPoint("test", 2)); - document.add(new StoredField("test", 2)); - indexWriter.addDocument(document); - - IndexReader reader = DirectoryReader.open(indexWriter); - IndexSearcher searcher = newSearcher(reader); - TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); - Document doc = searcher.doc(topDocs.scoreDocs[0].doc); - IndexableField f = doc.getField("test"); - assertThat(f.numericValue(), equalTo(2)); - - topDocs = searcher.search(IntPoint.newExactQuery("test", 2), 1); - doc = searcher.doc(topDocs.scoreDocs[0].doc); - f = doc.getField("test"); - assertThat(f.stringValue(), equalTo("2")); + try ( + Directory dir = new ByteBuffersDirectory(); + IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)) + ) { + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new IntPoint("test", 2)); + document.add(new StoredField("test", 2)); + indexWriter.addDocument(document); - indexWriter.close(); + try (IndexReader reader = DirectoryReader.open(indexWriter)) { + IndexSearcher searcher = newSearcher(reader); + TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); + Document doc = searcher.doc(topDocs.scoreDocs[0].doc); + IndexableField f = doc.getField("test"); + assertThat(f.numericValue(), equalTo(2)); + + topDocs = searcher.search(IntPoint.newExactQuery("test", 2), 1); + doc = searcher.doc(topDocs.scoreDocs[0].doc); + f = doc.getField("test"); + assertThat(f.stringValue(), equalTo("2")); + } + } } /** @@ -93,54 +101,55 @@ public void testSimpleNumericOps() throws Exception { * first (with load and break). */ public void testOrdering() throws Exception { - Directory dir = new ByteBuffersDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); - - Document document = new Document(); - document.add(new TextField("_id", "1", Field.Store.YES)); - document.add(new TextField("#id", "1", Field.Store.YES)); - indexWriter.addDocument(document); + try ( + Directory dir = new ByteBuffersDirectory(); + IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)) + ) { + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new TextField("#id", "1", Field.Store.YES)); + indexWriter.addDocument(document); - IndexReader reader = DirectoryReader.open(indexWriter); - IndexSearcher searcher = newSearcher(reader); - TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); - final ArrayList fieldsOrder = new ArrayList<>(); - searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() { - @Override - public Status needsField(FieldInfo fieldInfo) throws IOException { - fieldsOrder.add(fieldInfo.name); - return Status.YES; + try (IndexReader reader = DirectoryReader.open(indexWriter)) { + IndexSearcher searcher = newSearcher(reader); + TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); + final ArrayList fieldsOrder = new ArrayList<>(); + searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() { + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + fieldsOrder.add(fieldInfo.name); + return Status.YES; + } + }); + + assertThat(fieldsOrder.size(), equalTo(2)); + assertThat(fieldsOrder.get(0), equalTo("_id")); + assertThat(fieldsOrder.get(1), equalTo("#id")); } - }); - - assertThat(fieldsOrder.size(), equalTo(2)); - assertThat(fieldsOrder.get(0), equalTo("_id")); - assertThat(fieldsOrder.get(1), equalTo("#id")); - - indexWriter.close(); + } } public void testNRTSearchOnClosedWriter() throws Exception { - Directory dir = new ByteBuffersDirectory(); - IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); - DirectoryReader reader = DirectoryReader.open(indexWriter); - - for (int i = 0; i < 100; i++) { - Document document = new Document(); - TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); - document.add(field); - indexWriter.addDocument(document); - } - reader = refreshReader(reader); - - indexWriter.close(); - - for (LeafReaderContext leaf : reader.leaves()) { - leaf.reader().terms("_id").iterator().next(); + try ( + Directory dir = new ByteBuffersDirectory(); + IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + DirectoryReader reader = DirectoryReader.open(indexWriter) + ) { + for (int i = 0; i < 100; i++) { + Document document = new Document(); + TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); + document.add(field); + indexWriter.addDocument(document); + } + try (DirectoryReader refreshedReader = refreshReader(reader)) { + for (LeafReaderContext leaf : refreshedReader.leaves()) { + leaf.reader().terms("_id").iterator().next(); + } + } } } - private DirectoryReader refreshReader(DirectoryReader reader) throws IOException { + private static DirectoryReader refreshReader(DirectoryReader reader) throws IOException { DirectoryReader oldReader = reader; reader = DirectoryReader.openIfChanged(reader); if (reader != oldReader) { From 0fafb999a88d8a9fb0ac2501ec97ee9ea8a0647a Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Fri, 8 Sep 2023 14:30:16 +0200 Subject: [PATCH 030/136] [TEST] Address ExplainableScriptIT failure (#99351) ExplainableScriptIT tests the output of an explainable score script. The script declares that it does not need the score, in fact it returns the score as the value of a field from each document. The explain output though includes the explanation of the sub query, whose score is completely replaced by function score. The test assertson the sub query explanation which is thought inaccurate as the script declares that it needs no score. This test issue was made more evident by https://github.com/apache/lucene/pull/12383 as the sub query score became 0 . The solution is to not include the sub query explanation in the script explain output and remove the assertions that depend on that part of the output. --- .../search/functionscore/ExplainableScriptIT.java | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/ExplainableScriptIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/ExplainableScriptIT.java index aa769fa565a87..08697bc1470fb 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/ExplainableScriptIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/ExplainableScriptIT.java @@ -16,7 +16,6 @@ import org.elasticsearch.action.search.SearchType; import org.elasticsearch.common.lucene.search.function.CombineFunction; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.ScriptPlugin; import org.elasticsearch.script.DocReader; @@ -43,7 +42,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.concurrent.ExecutionException; import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; @@ -76,7 +74,7 @@ public boolean needs_score() { } @Override - public ScoreScript newInstance(DocReader docReader) throws IOException { + public ScoreScript newInstance(DocReader docReader) { return new MyScript(params1, lookup, ((DocValuesDocReader) docReader).getLeafReaderContext()); } }; @@ -99,13 +97,13 @@ static class MyScript extends ScoreScript implements ExplainableScoreScript { @Override public Explanation explain(Explanation subQueryScore) throws IOException { - Explanation scoreExp = Explanation.match(subQueryScore.getValue(), "_score: ", subQueryScore); - return Explanation.match((float) (execute(null)), "This script returned " + execute(null), scoreExp); + double score = execute(null); + return Explanation.match((float) score, "This script returned " + score); } @Override public double execute(ExplanationHolder explanation) { - return ((Number) ((ScriptDocValues) getDoc().get("number_field")).get(0)).doubleValue(); + return ((Number) (getDoc().get("number_field")).get(0)).doubleValue(); } } @@ -114,7 +112,7 @@ protected Collection> nodePlugins() { return Arrays.asList(ExplainableScriptPlugin.class); } - public void testExplainScript() throws InterruptedException, IOException, ExecutionException { + public void testExplainScript() throws InterruptedException, IOException { List indexRequests = new ArrayList<>(); for (int i = 0; i < 20; i++) { indexRequests.add( @@ -146,8 +144,6 @@ public void testExplainScript() throws InterruptedException, IOException, Execut for (SearchHit hit : hits.getHits()) { assertThat(hit.getId(), equalTo(Integer.toString(idCounter))); assertThat(hit.getExplanation().toString(), containsString(Double.toString(idCounter))); - assertThat(hit.getExplanation().toString(), containsString("1 = n")); - assertThat(hit.getExplanation().toString(), containsString("1 = N")); assertThat(hit.getExplanation().getDetails().length, equalTo(2)); idCounter--; } From 5f31006ed01d5a653de09b21d7c271e7f030527d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 11 Sep 2023 06:13:32 +0000 Subject: [PATCH 031/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-d70c9113472 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5275dd639af17..c895cc9705db5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-632d8c253bb +lucene = 9.8.0-snapshot-d70c9113472 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e357205f00c87..ef2bf7a4a2c18 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c547e5e76e5e8ca03acdb68d860363a493a6867a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 12 Sep 2023 06:14:49 +0000 Subject: [PATCH 032/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-d70c9113472 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ef2bf7a4a2c18..002230be89a91 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 42b691f7860bc47f5cfbedee8aa06a7181b981a9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 13 Sep 2023 06:15:36 +0000 Subject: [PATCH 033/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-95cdd2e9f26 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c895cc9705db5..3e992bfbc854c 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-d70c9113472 +lucene = 9.8.0-snapshot-95cdd2e9f26 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 002230be89a91..5ce4043a49327 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From a94074a2a41d28566d2cb734abe49bc84fb1503e Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 13 Sep 2023 08:56:20 -0400 Subject: [PATCH 034/136] Fixing compilation (#99529) --- .../common/lucene/search/function/MinScoreScorerTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java b/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java index 1fbdd18a1189b..97caa9ecf9503 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java @@ -164,7 +164,7 @@ public float getMaxScore(int upTo) throws IOException { random(), new ScoreMode[] { ScoreMode.COMPLETE, ScoreMode.TOP_SCORES, ScoreMode.TOP_DOCS_WITH_SCORES } ); - final Scorer assertingScorer = AssertingScorer.wrap(random(), scorer, scoreMode); + final Scorer assertingScorer = AssertingScorer.wrap(random(), scorer, scoreMode, true); if (twoPhase && randomBoolean()) { return hideTwoPhaseIterator(assertingScorer); } else { From e19fcc91cb1cee129325a608bef8dc4c15ae4ea0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 14 Sep 2023 06:14:59 +0000 Subject: [PATCH 035/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-95cdd2e9f26 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 5ce4043a49327..9625beb292c78 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 8c8330ebe8d7ac1dda5898d1957251a1a9698fa1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 15 Sep 2023 06:15:16 +0000 Subject: [PATCH 036/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-2c9fae0cf7c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 3e992bfbc854c..b539aa2341ff9 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-95cdd2e9f26 +lucene = 9.8.0-snapshot-2c9fae0cf7c bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9625beb292c78..cd7bba5e25043 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c1ecf70b2f8664d0d7b6a4b4637ee7f2cdedf659 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 17 Sep 2023 06:12:19 +0000 Subject: [PATCH 037/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-1f8e08481c2 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index b539aa2341ff9..b3f4f83a8e52e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-2c9fae0cf7c +lucene = 9.8.0-snapshot-1f8e08481c2 bundled_jdk_vendor = openjdk bundled_jdk = 20.0.2+9@6e380f22cbe7469fa75fb448bd903d8e diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cd7bba5e25043..718b3c5f9b068 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2523,124 +2523,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 31007450610e1f99924c68eabd938537d4d9a53a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 18 Sep 2023 06:14:57 +0000 Subject: [PATCH 038/136] [Automated] Update Lucene snapshot to 9.8.0-snapshot-1f8e08481c2 --- gradle/verification-metadata.xml | 48 ++++++++++++++++---------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 718b3c5f9b068..26bbbb4e7cadc 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2525,122 +2525,122 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 94f124c00b2e9a7610c2121553bdd1f526620bd1 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 18 Sep 2023 08:30:07 -0400 Subject: [PATCH 039/136] Fixing more tests for Lucene snapshot (#99587) * Fix DfsPhaseTests#testSingleKnnSearch * Fix suggestors and other things that should call `LeafCollector#finish` --- .../search/fetch/subphase/InnerHitsContext.java | 6 +++++- .../search/internal/ContextIndexSearcher.java | 4 ++++ .../suggest/completion/CompletionSuggester.java | 15 +++++++++++---- .../elasticsearch/search/dfs/DfsPhaseTests.java | 13 ++----------- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/InnerHitsContext.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/InnerHitsContext.java index 3d50e79acbd35..aa5399b0c62b1 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/InnerHitsContext.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/InnerHitsContext.java @@ -172,7 +172,11 @@ public static void intersect(Weight weight, Weight innerHitQueryWeight, Collecto } } } catch (CollectionTerminatedException e) { - // ignore and continue + // collection was terminated prematurely + // continue with the following leaf } + // Finish the leaf collection in preparation for the next. + // This includes any collection that was terminated early via `CollectionTerminatedException` + leafCollector.finish(); } } diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 90ddeda87d2d3..ca2cc40dd6035 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -497,6 +497,7 @@ private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collecto } catch (CollectionTerminatedException e) { // there is no doc of interest in this reader context // continue with the following leaf + // We don't need to finish leaf collector as collection was terminated before it was created return; } Bits liveDocs = ctx.reader().getLiveDocs(); @@ -531,6 +532,9 @@ private void searchLeaf(LeafReaderContext ctx, Weight weight, Collector collecto } } } + // Finish the leaf collection in preparation for the next. + // This includes any collection that was terminated early via `CollectionTerminatedException` + leafCollector.finish(); } private static BitSet getSparseBitSetOrNull(Bits liveDocs) { diff --git a/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java b/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java index 979aac5a2349c..d10a067a963d3 100644 --- a/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java +++ b/server/src/main/java/org/elasticsearch/search/suggest/completion/CompletionSuggester.java @@ -11,6 +11,7 @@ import org.apache.lucene.search.BulkScorer; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.search.suggest.document.CompletionQuery; import org.apache.lucene.search.suggest.document.TopSuggestDocs; @@ -79,20 +80,26 @@ private static void suggest(IndexSearcher searcher, CompletionQuery query, TopSu for (LeafReaderContext context : searcher.getIndexReader().leaves()) { BulkScorer scorer = weight.bulkScorer(context); if (scorer != null) { + LeafCollector leafCollector = null; try { - scorer.score(collector.getLeafCollector(context), context.reader().getLiveDocs()); + leafCollector = collector.getLeafCollector(context); + scorer.score(leafCollector, context.reader().getLiveDocs()); } catch (CollectionTerminatedException e) { // collection was terminated prematurely // continue with the following leaf } + // We can only finish the leaf collector if it was actually created + if (leafCollector != null) { + // We need to call finish as TopSuggestDocsCollector#finish() populates the pendingResults + // This is important when skipping duplicates + leafCollector.finish(); + } } } - collector.finish(); } @Override - protected CompletionSuggestion emptySuggestion(String name, CompletionSuggestionContext suggestion, CharsRefBuilder spare) - throws IOException { + protected CompletionSuggestion emptySuggestion(String name, CompletionSuggestionContext suggestion, CharsRefBuilder spare) { CompletionSuggestion completionSuggestion = new CompletionSuggestion(name, suggestion.getSize(), suggestion.isSkipDuplicates()); spare.copyUTF8Bytes(suggestion.getText()); CompletionSuggestion.Entry completionSuggestEntry = new CompletionSuggestion.Entry(new Text(spare.toString()), 0, spare.length()); diff --git a/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java b/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java index 0f213073757ce..2237eaabb76e0 100644 --- a/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java @@ -16,8 +16,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; -import org.elasticsearch.common.util.concurrent.EsExecutors; -import org.elasticsearch.common.util.concurrent.EsExecutors.TaskTrackingConfig; import org.elasticsearch.search.internal.ContextIndexSearcher; import org.elasticsearch.search.profile.Profilers; import org.elasticsearch.search.profile.SearchProfileDfsPhaseResult; @@ -25,6 +23,7 @@ import org.elasticsearch.search.profile.query.QueryProfileShardResult; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; +import org.elasticsearch.threadpool.ThreadPool; import org.junit.After; import org.junit.Before; @@ -39,16 +38,8 @@ public class DfsPhaseTests extends ESTestCase { @Before public final void init() { - int numThreads = randomIntBetween(2, 4); threadPool = new TestThreadPool(DfsPhaseTests.class.getName()); - threadPoolExecutor = EsExecutors.newFixed( - "test", - numThreads, - 10, - EsExecutors.daemonThreadFactory("test"), - threadPool.getThreadContext(), - randomFrom(TaskTrackingConfig.DEFAULT, TaskTrackingConfig.DO_NOT_TRACK) - ); + threadPoolExecutor = (ThreadPoolExecutor) threadPool.executor(ThreadPool.Names.SEARCH_WORKER); } @After From 5406f96450a24da177e33ebb5653a9a49302dd65 Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Fri, 28 Jul 2023 12:19:45 +0300 Subject: [PATCH 040/136] Add require data stream lifecycle feature flag (#98022) (#98026) --- qa/mixed-cluster/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/qa/mixed-cluster/build.gradle b/qa/mixed-cluster/build.gradle index 08d64e2b9353b..13152c82a78da 100644 --- a/qa/mixed-cluster/build.gradle +++ b/qa/mixed-cluster/build.gradle @@ -52,6 +52,7 @@ BuildParams.bwcVersions.withWireCompatible { bwcVersion, baseName -> setting 'path.repo', "${buildDir}/cluster/shared/repo/${baseName}" setting 'xpack.security.enabled', 'false' requiresFeature 'es.index_mode_feature_flag_registered', Version.fromString("8.0.0") + requiresFeature 'es.dlm_feature_flag_enabled', Version.fromString("8.9.0") } tasks.register("${baseName}#mixedClusterTest", StandaloneRestIntegTestTask) { From 83f99419d00796430a8724227b893871b734917e Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 19 Sep 2023 10:08:08 +0200 Subject: [PATCH 041/136] Revert "Add require data stream lifecycle feature flag (#98022) (#98026)" This reverts commit 5406f96450a24da177e33ebb5653a9a49302dd65. --- qa/mixed-cluster/build.gradle | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/mixed-cluster/build.gradle b/qa/mixed-cluster/build.gradle index 13152c82a78da..08d64e2b9353b 100644 --- a/qa/mixed-cluster/build.gradle +++ b/qa/mixed-cluster/build.gradle @@ -52,7 +52,6 @@ BuildParams.bwcVersions.withWireCompatible { bwcVersion, baseName -> setting 'path.repo', "${buildDir}/cluster/shared/repo/${baseName}" setting 'xpack.security.enabled', 'false' requiresFeature 'es.index_mode_feature_flag_registered', Version.fromString("8.0.0") - requiresFeature 'es.dlm_feature_flag_enabled', Version.fromString("8.9.0") } tasks.register("${baseName}#mixedClusterTest", StandaloneRestIntegTestTask) { From 62a2cfca35b295c862cd3c4e9cd8093b03f5eb64 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 19 Sep 2023 12:10:20 +0200 Subject: [PATCH 042/136] Fix stored access in script field type tests (#99650) We have been getting test failures from: DateScriptFieldTypeTests#testDistanceFeatureQuery BooleanScriptFieldTypeTests#testUsedInScript They are new because Lucene now offloads execution entirely to the executor when provided, which may surface test issues that were not issues before. In these examples, stored fields cached in SearchLookup were reused across threads which made us hit the lucene assertion that ensures that stored fields are always pulled and consumed from the same thread. The fixes are relatively simple: recreate the query, or recreate the mock search execution context so you get a clean search lookup with no cached stored fields. Both failures were caused by subsequent searches reusing the same query or the same search execution context. --- .../mapper/BooleanScriptFieldTypeTests.java | 69 +++++++++++-------- .../mapper/DateScriptFieldTypeTests.java | 38 +++++----- 2 files changed, 62 insertions(+), 45 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java index 8a8b8e0085415..ee84eef86c1e2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanScriptFieldTypeTests.java @@ -120,41 +120,52 @@ public void testUsedInScript() throws IOException { iw.addDocument(List.of(new StoredField("_source", new BytesRef("{\"foo\": [false]}")))); try (DirectoryReader reader = iw.getReader()) { IndexSearcher searcher = newSearcher(reader); - SearchExecutionContext searchContext = mockContext(true, simpleMappedFieldType()); - assertThat(searcher.count(new ScriptScoreQuery(new MatchAllDocsQuery(), new Script("test"), new ScoreScript.LeafFactory() { - @Override - public boolean needs_score() { - return false; - } + { + SearchExecutionContext searchContext = mockContext(true, simpleMappedFieldType()); + assertThat( + searcher.count(new ScriptScoreQuery(new MatchAllDocsQuery(), new Script("test"), new ScoreScript.LeafFactory() { + @Override + public boolean needs_score() { + return false; + } - @Override - public ScoreScript newInstance(DocReader docReader) { - return new ScoreScript(Map.of(), searchContext.lookup(), docReader) { @Override - public double execute(ExplanationHolder explanation) { - ScriptDocValues.Booleans booleans = (ScriptDocValues.Booleans) getDoc().get("test"); - return booleans.get(0) ? 3 : 0; + public ScoreScript newInstance(DocReader docReader) { + return new ScoreScript(Map.of(), searchContext.lookup(), docReader) { + @Override + public double execute(ExplanationHolder explanation) { + ScriptDocValues.Booleans booleans = (ScriptDocValues.Booleans) getDoc().get("test"); + return booleans.get(0) ? 3 : 0; + } + }; + } + }, searchContext.lookup(), 2.5f, "test", 0, IndexVersion.current())), + equalTo(1) + ); + } + { + SearchExecutionContext searchContext = mockContext(true, simpleMappedFieldType()); + assertThat( + searcher.count(new ScriptScoreQuery(new MatchAllDocsQuery(), new Script("test"), new ScoreScript.LeafFactory() { + @Override + public boolean needs_score() { + return false; } - }; - } - }, searchContext.lookup(), 2.5f, "test", 0, IndexVersion.current())), equalTo(1)); - assertThat(searcher.count(new ScriptScoreQuery(new MatchAllDocsQuery(), new Script("test"), new ScoreScript.LeafFactory() { - @Override - public boolean needs_score() { - return false; - } - @Override - public ScoreScript newInstance(DocReader docReader) { - return new ScoreScript(Map.of(), searchContext.lookup(), docReader) { @Override - public double execute(ExplanationHolder explanation) { - BooleanDocValuesField booleans = (BooleanDocValuesField) field("test"); - return booleans.getInternal(0) ? 3 : 0; + public ScoreScript newInstance(DocReader docReader) { + return new ScoreScript(Map.of(), searchContext.lookup(), docReader) { + @Override + public double execute(ExplanationHolder explanation) { + BooleanDocValuesField booleans = (BooleanDocValuesField) field("test"); + return booleans.getInternal(0) ? 3 : 0; + } + }; } - }; - } - }, searchContext.lookup(), 2.5f, "test", 0, IndexVersion.current())), equalTo(1)); + }, searchContext.lookup(), 2.5f, "test", 0, IndexVersion.current())), + equalTo(1) + ); + } } } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java index 9260754d0c611..ed31d8d648048 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateScriptFieldTypeTests.java @@ -247,22 +247,28 @@ public void testDistanceFeatureQuery() throws IOException { ); try (DirectoryReader reader = iw.getReader()) { IndexSearcher searcher = newSearcher(reader); - Query query = simpleMappedFieldType().distanceFeatureQuery(1595432181354L, "1ms", mockContext()); - TopDocs docs = searcher.search(query, 4); - assertThat(docs.scoreDocs, arrayWithSize(3)); - assertThat(readSource(reader, docs.scoreDocs[0].doc), equalTo("{\"timestamp\": [1595432181354]}")); - assertThat(docs.scoreDocs[0].score, equalTo(1.0F)); - assertThat(readSource(reader, docs.scoreDocs[1].doc), equalTo("{\"timestamp\": [1595432181356, 1]}")); - assertThat((double) docs.scoreDocs[1].score, closeTo(.333, .001)); - assertThat(readSource(reader, docs.scoreDocs[2].doc), equalTo("{\"timestamp\": [1595432181351]}")); - assertThat((double) docs.scoreDocs[2].score, closeTo(.250, .001)); - Explanation explanation = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0F) - .explain(reader.leaves().get(0), docs.scoreDocs[0].doc); - assertThat(explanation.toString(), containsString("1.0 = Distance score, computed as weight * pivot / (pivot")); - assertThat(explanation.toString(), containsString("1.0 = weight")); - assertThat(explanation.toString(), containsString("1 = pivot")); - assertThat(explanation.toString(), containsString("1595432181354 = origin")); - assertThat(explanation.toString(), containsString("1595432181354 = current value")); + TopDocs docs; + { + Query query = simpleMappedFieldType().distanceFeatureQuery(1595432181354L, "1ms", mockContext()); + docs = searcher.search(query, 4); + assertThat(docs.scoreDocs, arrayWithSize(3)); + assertThat(readSource(reader, docs.scoreDocs[0].doc), equalTo("{\"timestamp\": [1595432181354]}")); + assertThat(docs.scoreDocs[0].score, equalTo(1.0F)); + assertThat(readSource(reader, docs.scoreDocs[1].doc), equalTo("{\"timestamp\": [1595432181356, 1]}")); + assertThat((double) docs.scoreDocs[1].score, closeTo(.333, .001)); + assertThat(readSource(reader, docs.scoreDocs[2].doc), equalTo("{\"timestamp\": [1595432181351]}")); + assertThat((double) docs.scoreDocs[2].score, closeTo(.250, .001)); + } + { + Query query = simpleMappedFieldType().distanceFeatureQuery(1595432181354L, "1ms", mockContext()); + Explanation explanation = query.createWeight(searcher, ScoreMode.TOP_SCORES, 1.0F) + .explain(reader.leaves().get(0), docs.scoreDocs[0].doc); + assertThat(explanation.toString(), containsString("1.0 = Distance score, computed as weight * pivot / (pivot")); + assertThat(explanation.toString(), containsString("1.0 = weight")); + assertThat(explanation.toString(), containsString("1 = pivot")); + assertThat(explanation.toString(), containsString("1595432181354 = origin")); + assertThat(explanation.toString(), containsString("1595432181354 = current value")); + } } } } From dee85de61cf5daf7534564053d0db379260845c2 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 20 Sep 2023 08:30:09 -0400 Subject: [PATCH 043/136] Adds new max_inner_product vector similarity function (#99527) Adds new max_inner_product vector similarity function. This differs from dot_product in the following ways: Doesn't require vectors to be normalized Scales the similarity between vectors differently to prevent negative scores --- docs/changelog/99527.yaml | 5 ++ .../mapping/types/dense-vector.asciidoc | 10 ++- .../test/search.vectors/40_knn_search.yml | 85 +++++++++++++++++++ .../vectors/DenseVectorFieldMapper.java | 8 ++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/99527.yaml diff --git a/docs/changelog/99527.yaml b/docs/changelog/99527.yaml new file mode 100644 index 0000000000000..19eef621fa500 --- /dev/null +++ b/docs/changelog/99527.yaml @@ -0,0 +1,5 @@ +pr: 99445 +summary: Add new max_inner_product vector similarity function +area: Vector Search +type: enhancement +issues: [] diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc index fb50ee36644a6..96427a01e61d5 100644 --- a/docs/reference/mapping/types/dense-vector.asciidoc +++ b/docs/reference/mapping/types/dense-vector.asciidoc @@ -159,7 +159,7 @@ distance) between the vectors. The document `_score` is computed as `1 / (1 + l2_norm(query, vector)^2)`. `dot_product`::: -Computes the dot product of two vectors. This option provides an optimized way +Computes the dot product of two unit vectors. This option provides an optimized way to perform cosine similarity. The constraints and computed score are defined by `element_type`. + @@ -181,6 +181,14 @@ original vectors and cannot normalize them in advance. The document `_score` is computed as `(1 + cosine(query, vector)) / 2`. The `cosine` similarity does not allow vectors with zero magnitude, since cosine is not defined in this case. + +`max_inner_product`::: +Computes the maximum inner product of two vectors. This is similar to `dot_product`, +but doesn't require vectors to be normalized. This means that each vector's magnitude +can significantly effect the score. The document `_score` is adjusted to prevent negative +values. For `max_inner_product` values `< 0`, the `_score` is +`1 / (1 + -1 * max_inner_product(query, vector))`. For non-negative `max_inner_product` results +the `_score` is calculated `max_inner_product(query, vector) + 1`. ==== NOTE: Although they are conceptually related, the `similarity` parameter is diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml index f34aef9b83321..340cd8f8d0f70 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml @@ -368,3 +368,88 @@ setup: filter: {"term": {"name": "cow.jpg"}} - length: {hits.hits: 0} +--- +"Knn search with mip": + - skip: + version: ' - 8.10.99' + reason: 'mip similarity added in 8.11' + features: close_to + + - do: + indices.create: + index: mip + body: + mappings: + properties: + name: + type: keyword + vector: + type: dense_vector + dims: 5 + index: true + similarity: max_inner_product + + - do: + index: + index: mip + id: "1" + body: + name: cow.jpg + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + + - do: + index: + index: mip + id: "2" + body: + name: moose.jpg + vector: [-0.5, 100.0, -13, 14.8, -156.0] + + - do: + index: + index: mip + id: "3" + body: + name: rabbit.jpg + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - do: + indices.refresh: {} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + + + - length: {hits.hits: 3} + - match: {hits.hits.0._id: "1"} + - close_to: {hits.hits.0._score: {value: 58694.902, error: 0.01}} + - match: {hits.hits.1._id: "3"} + - close_to: {hits.hits.1._score: {value: 34702.79, error: 0.01}} + - match: {hits.hits.2._id: "2"} + - close_to: {hits.hits.2._score: {value: 33686.29, error: 0.01}} + + - do: + search: + index: mip + body: + fields: [ "name" ] + knn: + num_candidates: 3 + k: 3 + field: vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + filter: { "term": { "name": "moose.jpg" } } + + + + - length: {hits.hits: 1} + - match: {hits.hits.0._id: "2"} + - close_to: {hits.hits.0._score: {value: 33686.29, error: 0.01}} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 28f83a167fda3..dc90dc7382780 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -661,6 +661,14 @@ float score(float similarity, ElementType elementType, int dim) { case FLOAT -> (1 + similarity) / 2f; }; } + }, + MAX_INNER_PRODUCT(VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT) { + @Override + float score(float similarity, ElementType elementType, int dim) { + return switch (elementType) { + case BYTE, FLOAT -> similarity < 0 ? 1 / (1 + -1 * similarity) : similarity + 1; + }; + } }; public final VectorSimilarityFunction function; From 92cea2797e811680e05151c8805e16c003db84d4 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Thu, 28 Sep 2023 11:38:04 -0400 Subject: [PATCH 044/136] Add nested support for dense_vector fields and knn search (#99763) * Nested dense_vector support * Adjust nested support based on new lucene version * fixing after rebase * fixing some code * fixing tests adding transport version * spotless * [Automated] Update Lucene snapshot to 9.9.0-snapshot-b3e67403aaf * Adds new max_inner_product vector similarity function (#99527) Adds new max_inner_product vector similarity function. This differs from dot_product in the following ways: Doesn't require vectors to be normalized Scales the similarity between vectors differently to prevent negative scores * requiring top level filter to be parent filter * adding docs & fixing tests * adding and fixing docs * adding changlog * removing unnecessary file changes * removing unused imports * fixing test * maybe fix doc tests * continue tests in docs * fixing more tests * fixing tests --------- Co-authored-by: Jim Ferenczi Co-authored-by: elasticsearchmachine --- docs/changelog/99532.yaml | 5 + .../mapping/types/dense-vector.asciidoc | 3 - .../search-your-data/knn-search.asciidoc | 370 +++++++++++++++++- .../search.vectors/100_knn_nested_search.yml | 136 +++++++ .../60_dense_vector_dynamic_mapping.yml | 12 +- .../search/nested/VectorNestedIT.java | 76 ++++ .../org/elasticsearch/TransportVersions.java | 2 + .../action/search/DfsQueryPhase.java | 13 +- .../action/search/SearchPhaseController.java | 7 +- .../vectors/DenseVectorFieldMapper.java | 41 +- .../index/query/NestedQueryBuilder.java | 4 +- .../search/dfs/DfsKnnResults.java | 18 +- .../elasticsearch/search/dfs/DfsPhase.java | 9 +- .../search/vectors/KnnSearchBuilder.java | 45 ++- .../search/vectors/KnnVectorQueryBuilder.java | 31 +- .../action/search/DfsQueryPhaseTests.java | 3 +- .../vectors/DenseVectorFieldMapperTests.java | 40 +- .../vectors/DenseVectorFieldTypeTests.java | 64 ++- .../search/dfs/DfsPhaseTests.java | 4 +- 19 files changed, 805 insertions(+), 78 deletions(-) create mode 100644 docs/changelog/99532.yaml create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/search/nested/VectorNestedIT.java diff --git a/docs/changelog/99532.yaml b/docs/changelog/99532.yaml new file mode 100644 index 0000000000000..859ba963600a8 --- /dev/null +++ b/docs/changelog/99532.yaml @@ -0,0 +1,5 @@ +pr: 99532 +summary: Adds `nested` support for indexed `dense_vector` fields +area: Vector Search +type: enhancement +issues: [] diff --git a/docs/reference/mapping/types/dense-vector.asciidoc b/docs/reference/mapping/types/dense-vector.asciidoc index 64f65aad57cda..96a009842ddc2 100644 --- a/docs/reference/mapping/types/dense-vector.asciidoc +++ b/docs/reference/mapping/types/dense-vector.asciidoc @@ -111,9 +111,6 @@ PUT my-index-2 efficient kNN search. Like most kNN algorithms, HNSW is an approximate method that sacrifices result accuracy for improved speed. -Dense vector fields cannot be indexed if they are within -<> mappings. - [role="child_attributes"] [[dense-vector-params]] ==== Parameters for dense vector fields diff --git a/docs/reference/search/search-your-data/knn-search.asciidoc b/docs/reference/search/search-your-data/knn-search.asciidoc index 0f07033881d97..8c676a5515ca3 100644 --- a/docs/reference/search/search-your-data/knn-search.asciidoc +++ b/docs/reference/search/search-your-data/knn-search.asciidoc @@ -359,7 +359,7 @@ the dense vectors from the input data, * the text embedding NLP model deployment must be started. ===================== -Reference the deployed text embedding model or the model deployment in the +Reference the deployed text embedding model or the model deployment in the `query_vector_builder` object and provide the search query as `model_text`: [source,js] @@ -385,8 +385,8 @@ Reference the deployed text embedding model or the model deployment in the <1> The {nlp} task to perform. It must be `text_embedding`. <2> The ID of the text embedding model to use to generate the dense vectors from the query string. Use the same model that generated the embeddings from the -input text in the index you search against. You can use the value of the -`deployment_id` instead in the `model_id` argument. +input text in the index you search against. You can use the value of the +`deployment_id` instead in the `model_id` argument. <3> The query string from which the model generates the dense vector representation. @@ -488,6 +488,367 @@ In our data set, the only document with the file type of `png` has a vector of ` between `[42, 8, -15]` and `[1, 5, -20]` is `41.412`, which is greater than the configured similarity of `36`. Meaning, this search will return no hits. +[discrete] +[[nested-knn-search]] +==== Nested kNN Search + +It is common for text to exceed a particular model's token limit and requires chunking before building the embeddings +for individual chunks. When using <> with <>, you can achieve nearest +passage retrieval without copying top-level document metadata. + +Here is a simple passage vectors index that stores vectors and some top-level metadata for filtering. + +[source,console] +---- +PUT passage_vectors +{ + "mappings": { + "properties": { + "full_text": { + "type": "text" + }, + "creation_time": { + "type": "date" + }, + "paragraph": { + "type": "nested", + "properties": { + "vector": { + "type": "dense_vector", + "dims": 2, + "index": true, + "similarity": "cosine" + }, + "text": { + "type": "text", + "index": false + } + } + } + } + } +} +---- +//TEST[continued] + +With the above mapping, we can index multiple passage vectors along with storing the individual passage text. + +[source,console] +---- +POST passage_vectors/_bulk?refresh=true +{ "index": { "_id": "1" } } +{ "full_text": "first paragraph another paragraph", "creation_time": "2019-05-04", "paragraph": [ { "vector": [ 0.45, 45 ], "text": "first paragraph", "paragraph_id": "1" }, { "vector": [ 0.8, 0.6 ], "text": "another paragraph", "paragraph_id": "2" } ] } +{ "index": { "_id": "2" } } +{ "full_text": "number one paragraph number two paragraph", "creation_time": "2020-05-04", "paragraph": [ { "vector": [ 1.2, 4.5 ], "text": "number one paragraph", "paragraph_id": "1" }, { "vector": [ -1, 42 ], "text": "number two paragraph", "paragraph_id": "2" } ] } +---- +//TEST[continued] +//TEST[s/\.\.\.//] + +The query will seem very similar to a typical kNN search: +[source,console] +---- +POST passage_vectors/_search +{ + "fields": ["full_text", "creation_time"], + "_source": false, + "knn": { + "query_vector": [ + 0.45, + 45 + ], + "field": "paragraph.vector", + "k": 2, + "num_candidates": 2 + } +} +---- +//TEST[continued] + +Note below that even though we have 4 total vectors, we still return two documents. kNN search over nested dense_vectors +will always diversify the top results over the top-level document. Meaning, `"k"` top-level documents will be returned, +scored by their nearest passage vector (e.g. `"paragraph.vector"`). + +[source,console-result] +---- +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "passage_vectors", + "_id": "1", + "_score": 1.0, + "fields": { + "creation_time": [ + "2019-05-04T00:00:00.000Z" + ], + "full_text": [ + "first paragraph another paragraph" + ] + } + }, + { + "_index": "passage_vectors", + "_id": "2", + "_score": 0.9997144, + "fields": { + "creation_time": [ + "2020-05-04T00:00:00.000Z" + ], + "full_text": [ + "number one paragraph number two paragraph" + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 4/"took" : "$body.took"/] + +What if you wanted to filter by some top-level document metadata? You can do this by adding `filter` to your +`knn` clause. + + +NOTE: `filter` will always be over the top-level document metadata. This means you cannot filter based on `nested` + field metadata. + +[source,console] +---- +POST passage_vectors/_search +{ + "fields": [ + "creation_time", + "full_text" + ], + "_source": false, + "knn": { + "query_vector": [ + 0.45, + 45 + ], + "field": "paragraph.vector", + "k": 2, + "num_candidates": 2, + "filter": { + "bool": { + "filter": [ + { + "range": { + "creation_time": { + "gte": "2019-05-01", + "lte": "2019-05-05" + } + } + } + ] + } + } + } +} +---- +//TEST[continued] + +Now we have filtered based on the top level `"creation_time"` and only one document falls within that range. + +[source,console-result] +---- +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "passage_vectors", + "_id": "1", + "_score": 1.0, + "fields": { + "creation_time": [ + "2019-05-04T00:00:00.000Z" + ], + "full_text": [ + "first paragraph another paragraph" + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 4/"took" : "$body.took"/] + +Additionally, if you wanted to extract the nearest passage for a matched document, you can supply <> +to the `knn` clause. + +NOTE: `inner_hits` for kNN will only ever return a single hit, the nearest passage vector. +Setting `"size"` to any value greater than `1` will have no effect on the results. + +[source,console] +---- +POST passage_vectors/_search +{ + "fields": [ + "creation_time", + "full_text" + ], + "_source": false, + "knn": { + "query_vector": [ + 0.45, + 45 + ], + "field": "paragraph.vector", + "k": 2, + "num_candidates": 2, + "inner_hits": { + "_source": false, + "fields": [ + "paragraph.text" + ] + } + } +} +---- +//TEST[continued] + +Now the result will contain the nearest found paragraph when searching. + +[source,console-result] +---- +{ + "took": 4, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "passage_vectors", + "_id": "1", + "_score": 1.0, + "fields": { + "creation_time": [ + "2019-05-04T00:00:00.000Z" + ], + "full_text": [ + "first paragraph another paragraph" + ] + }, + "inner_hits": { + "paragraph": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "passage_vectors", + "_id": "1", + "_nested": { + "field": "paragraph", + "offset": 0 + }, + "_score": 1.0, + "fields": { + "paragraph": [ + { + "text": [ + "first paragraph" + ] + } + ] + } + } + ] + } + } + } + }, + { + "_index": "passage_vectors", + "_id": "2", + "_score": 0.9997144, + "fields": { + "creation_time": [ + "2020-05-04T00:00:00.000Z" + ], + "full_text": [ + "number one paragraph number two paragraph" + ] + }, + "inner_hits": { + "paragraph": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.9997144, + "hits": [ + { + "_index": "passage_vectors", + "_id": "2", + "_nested": { + "field": "paragraph", + "offset": 1 + }, + "_score": 0.9997144, + "fields": { + "paragraph": [ + { + "text": [ + "number two paragraph" + ] + } + ] + } + } + ] + } + } + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 4/"took" : "$body.took"/] + + [discrete] [[knn-indexing-considerations]] ==== Indexing considerations @@ -532,9 +893,6 @@ PUT image-index [[approximate-knn-limitations]] ==== Limitations for approximate kNN search -* You can't run an approximate kNN search on a `dense_vector` field within a -<> mapping. - * When using kNN search in <>, the <> option is not supported. diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml new file mode 100644 index 0000000000000..6e2d30eb5ad1b --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml @@ -0,0 +1,136 @@ +setup: + - skip: + version: ' - 8.10.99' + reason: 'nested kNN search added in 8.11' + - do: + indices.create: + index: test + body: + mappings: + properties: + name: + type: keyword + nested: + type: nested + properties: + paragraph_id: + type: keyword + vector: + type: dense_vector + dims: 5 + index: true + similarity: l2_norm + + - do: + index: + index: test + id: "1" + body: + name: cow.jpg + nested: + - paragraph_id: 0 + vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + - paragraph_id: 1 + vector: [240.0, 300, -3, 1, -20] + + - do: + index: + index: test + id: "2" + body: + name: moose.jpg + nested: + - paragraph_id: 0 + vector: [-0.5, 100.0, -13, 14.8, -156.0] + - paragraph_id: 2 + vector: [0, 100.0, 0, 14.8, -156.0] + - paragraph_id: 3 + vector: [0, 1.0, 0, 1.8, -15.0] + + - do: + index: + index: test + id: "3" + body: + name: rabbit.jpg + nested: + - paragraph_id: 0 + vector: [0.5, 111.3, -13.0, 14.8, -156.0] + + - do: + indices.refresh: {} + +--- +"nested kNN search only": + - do: + search: + index: test + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: test + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + inner_hits: {size: 1, "fields": ["nested.paragraph_id"], _source: false} + + - match: {hits.hits.0._id: "2"} + - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} + + - match: {hits.hits.1._id: "3"} + - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} +--- +"nested kNN search filtered": + + - do: + search: + index: test + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 2 + num_candidates: 3 + filter: {term: {name: "rabbit.jpg"}} + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + + - do: + search: + index: test + body: + fields: [ "name" ] + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + k: 3 + num_candidates: 3 + filter: {term: {name: "rabbit.jpg"}} + inner_hits: {size: 1, fields: ["nested.paragraph_id"], _source: false} + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "3"} + - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml index 030ce2e2332b1..d2c02fcbff38e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/60_dense_vector_dynamic_mapping.yml @@ -426,7 +426,7 @@ setup: --- -"Fields with nested float arrays within the threshold map throw an exception": +"Fields with nested float arrays within the threshold map index a nested dense_vector": - do: indices.create: @@ -442,7 +442,6 @@ setup: nested - do: - catch: /\[dense_vector\] fields cannot be indexed if they\'re within \[nested\] mappings/ index: index: test-nested-index id: "1" @@ -453,6 +452,15 @@ setup: 159.1, 289.56, -128.7424, 145.9871, -164.0003, 86.4034, -89.6929, 257.9717, 131.6075, 67.9233, -144.8255, 223.8446, 77.3228, -210.1163, -139.4783, 12.6499, 15.4491, 108.3465, -189.3947, 178.2045, -187.5925, 184.5089, 77.3022, -202.7439, -13.4959, 115.9719, -139.4332, 196.7845, 104.7573, -156.7746, 166.9878, 68.3936, 159.8473, -141.4446, 21.1947, 186.5908, -209.6895, 68.6169, 44.1255, 147.4659, 56.5079, -179.7997, -85.1651, 11.4847, 124.1662, 96.2246, -178.6705, 85.5925, 205.3616, -16.4704, 172.4947, -115.2535, -58.1722, 94.4836, 34.6458, -70.1011, -58.8047, 149.9562, -37.8998, 196.9805, -169.3555, -163.9432, 188.5611, 214.8378, 29.3182, -24.8724, 152.9382, -109.4345, -123.6716, -8.2441, 64.5902, 27.8083, 40.8185, -94.3161, 58.1463, -138.7432, 24.6805, -88.7222, -11.2018, 206.6434, 201.9024, 87.3079, -3.2883, -60.2484, -109.5789, 105.5766, -116.6709, -17.7073, -71.5093, -75.2937, -176.8691, -146.4967, 53.7586, 199.5294, 55.9754, -48.7399, 82.2051, 135.2921, 22.4408, -116.4008, -33.7538, 29.7207, 6.3692, -97.5768, -12.7982, -200.9331, -62.2743, 81.0843, 136.2247, 150.2565, 139.6838, 155.2657, -25.7447, 198.5955, 18.8099, 46.9014, -60.2672, 136.4801, 171.8966, 172.5842, 13.9123, 75.8386, -64.2444, -48.1964, 135.9685, 7.4927, -40.6424, -76.8922 ] + - do: + indices.get_mapping: + index: test-nested-index + + - match: { test-nested-index.mappings.properties.my_nested_field.properties.my_field.type: dense_vector } + - match: { test-nested-index.mappings.properties.my_nested_field.properties.my_field.dims: 128 } + - match: { test-nested-index.mappings.properties.my_nested_field.properties.my_field.index: true } + - match: { test-nested-index.mappings.properties.my_nested_field.properties.my_field.similarity: cosine } + --- "Fields with copyTo fields still map as float": diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/nested/VectorNestedIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/nested/VectorNestedIT.java new file mode 100644 index 0000000000000..015dc9628de21 --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/nested/VectorNestedIT.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.search.nested; + +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.cluster.health.ClusterHealthStatus; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.search.vectors.KnnSearchBuilder; +import org.elasticsearch.test.ESIntegTestCase; + +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.notNullValue; + +public class VectorNestedIT extends ESIntegTestCase { + + public void testSimpleNested() throws Exception { + assertAcked( + prepareCreate("test").setMapping( + jsonBuilder().startObject() + .startObject("properties") + .startObject("nested") + .field("type", "nested") + .startObject("properties") + .startObject("vector") + .field("type", "dense_vector") + .field("index", true) + .field("dims", 3) + .field("similarity", "cosine") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ).setSettings(Settings.builder().put(indexSettings()).put("index.number_of_shards", 1)) + ); + ensureGreen(); + + client().prepareIndex("test") + .setId("1") + .setSource( + jsonBuilder().startObject() + .startArray("nested") + .startObject() + .field("vector", new float[] { 1, 1, 1 }) + .endObject() + .endArray() + .endObject() + ) + .get(); + + waitForRelocation(ClusterHealthStatus.GREEN); + GetResponse getResponse = client().prepareGet("test", "1").get(); + assertThat(getResponse.isExists(), equalTo(true)); + assertThat(getResponse.getSourceAsBytes(), notNullValue()); + refresh(); + + SearchResponse searchResponse = client().prepareSearch("test") + .setKnnSearch(List.of(new KnnSearchBuilder("nested.vector", new float[] { 1, 1, 1 }, 1, 1, null))) + .setAllowPartialSearchResults(false) + .get(); + assertThat(searchResponse.getHits().getHits().length, greaterThan(0)); + } + +} diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 926cc4801dddc..29e42630ec23e 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -145,6 +145,8 @@ static TransportVersion def(int id) { public static final TransportVersion WAIT_FOR_CLUSTER_STATE_IN_RECOVERY_ADDED = def(8_502_00_0); public static final TransportVersion RECOVERY_COMMIT_TOO_NEW_EXCEPTION_ADDED = def(8_503_00_0); public static final TransportVersion NODE_INFO_COMPONENT_VERSIONS_ADDED = def(8_504_00_0); + public static final TransportVersion NESTED_KNN_VECTOR_QUERY_V = def(8_599_00_0); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 8d615190d44ac..dca269f06a3d3 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -8,6 +8,9 @@ package org.elasticsearch.action.search; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.index.query.NestedQueryBuilder; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.SearchPhaseResult; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.builder.SearchSourceBuilder; @@ -139,6 +142,7 @@ ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { List subSearchSourceBuilders = new ArrayList<>(source.subSearches()); + int i = 0; for (DfsKnnResults dfsKnnResults : knnResults) { List scoreDocs = new ArrayList<>(); for (ScoreDoc scoreDoc : dfsKnnResults.scoreDocs()) { @@ -147,8 +151,13 @@ ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { } } scoreDocs.sort(Comparator.comparingInt(scoreDoc -> scoreDoc.doc)); - KnnScoreDocQueryBuilder knnQuery = new KnnScoreDocQueryBuilder(scoreDocs.toArray(new ScoreDoc[0])); - subSearchSourceBuilders.add(new SubSearchSourceBuilder(knnQuery)); + String nestedPath = dfsKnnResults.getNestedPath(); + QueryBuilder query = new KnnScoreDocQueryBuilder(scoreDocs.toArray(new ScoreDoc[0])); + if (nestedPath != null) { + query = new NestedQueryBuilder(nestedPath, query, ScoreMode.Max).innerHit(source.knnSearch().get(i).innerHit()); + } + subSearchSourceBuilders.add(new SubSearchSourceBuilder(query)); + i++; } source = source.shallowCopy().subSearches(subSearchSourceBuilders).knnSearch(List.of()); diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index 99134da6bc216..fb554232503f2 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TotalHits; import org.apache.lucene.search.TotalHits.Relation; +import org.apache.lucene.util.SetOnce; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore; import org.elasticsearch.common.util.Maps; @@ -139,8 +140,10 @@ public static List mergeKnnResults(SearchRequest request, List> topDocsLists = new ArrayList<>(request.source().knnSearch().size()); + List> nestedPath = new ArrayList<>(request.source().knnSearch().size()); for (int i = 0; i < request.source().knnSearch().size(); i++) { topDocsLists.add(new ArrayList<>()); + nestedPath.add(new SetOnce<>()); } for (DfsSearchResult dfsSearchResult : dfsSearchResults) { @@ -152,13 +155,15 @@ public static List mergeKnnResults(SearchRequest request, List mergedResults = new ArrayList<>(request.source().knnSearch().size()); for (int i = 0; i < request.source().knnSearch().size(); i++) { TopDocs mergedTopDocs = TopDocs.merge(request.source().knnSearch().get(i).k(), topDocsLists.get(i).toArray(new TopDocs[0])); - mergedResults.add(new DfsKnnResults(mergedTopDocs.scoreDocs)); + mergedResults.add(new DfsKnnResults(nestedPath.get(i).get(), mergedTopDocs.scoreDocs)); } return mergedResults; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 15237ee0ca2b8..5b086b0374171 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -29,6 +29,9 @@ import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.DiversifyingChildrenByteKnnVectorQuery; +import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -41,7 +44,6 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.MappingLookup; import org.elasticsearch.index.mapper.MappingParser; import org.elasticsearch.index.mapper.SimpleMappedFieldType; import org.elasticsearch.index.mapper.SourceLoader; @@ -73,6 +75,7 @@ * A {@link FieldMapper} for indexing a dense vector of floats. */ public class DenseVectorFieldMapper extends FieldMapper { + public static final IndexVersion MAGNITUDE_STORED_INDEX_VERSION = IndexVersion.V_7_5_0; public static final IndexVersion INDEXED_BY_DEFAULT_INDEX_VERSION = IndexVersion.V_8_11_0; public static final IndexVersion LITTLE_ENDIAN_FLOAT_STORED_INDEX_VERSION = IndexVersion.V_8_9_0; @@ -821,7 +824,13 @@ public Query termQuery(Object value, SearchExecutionContext context) { throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support term queries"); } - public Query createKnnQuery(byte[] queryVector, int numCands, Query filter, Float similarityThreshold) { + public Query createKnnQuery( + byte[] queryVector, + int numCands, + Query filter, + Float similarityThreshold, + BitSetProducer parentFilter + ) { if (isIndexed() == false) { throw new IllegalArgumentException( "to perform knn search on field [" + name() + "], its mapping must have [index] set to [true]" @@ -844,7 +853,9 @@ public Query createKnnQuery(byte[] queryVector, int numCands, Query filter, Floa float squaredMagnitude = VectorUtil.dotProduct(queryVector, queryVector); elementType.checkVectorMagnitude(similarity, elementType.errorByteElementsAppender(queryVector), squaredMagnitude); } - Query knnQuery = new KnnByteVectorQuery(name(), queryVector, numCands, filter); + Query knnQuery = parentFilter != null + ? new DiversifyingChildrenByteKnnVectorQuery(name(), queryVector, filter, numCands, parentFilter) + : new KnnByteVectorQuery(name(), queryVector, numCands, filter); if (similarityThreshold != null) { knnQuery = new VectorSimilarityQuery( knnQuery, @@ -855,7 +866,13 @@ public Query createKnnQuery(byte[] queryVector, int numCands, Query filter, Floa return knnQuery; } - public Query createKnnQuery(float[] queryVector, int numCands, Query filter, Float similarityThreshold) { + public Query createKnnQuery( + float[] queryVector, + int numCands, + Query filter, + Float similarityThreshold, + BitSetProducer parentFilter + ) { if (isIndexed() == false) { throw new IllegalArgumentException( "to perform knn search on field [" + name() + "], its mapping must have [index] set to [true]" @@ -879,10 +896,15 @@ public Query createKnnQuery(float[] queryVector, int numCands, Query filter, Flo for (int i = 0; i < queryVector.length; i++) { bytes[i] = (byte) queryVector[i]; } - yield new KnnByteVectorQuery(name(), bytes, numCands, filter); + yield parentFilter != null + ? new DiversifyingChildrenByteKnnVectorQuery(name(), bytes, filter, numCands, parentFilter) + : new KnnByteVectorQuery(name(), bytes, numCands, filter); } - case FLOAT -> new KnnFloatVectorQuery(name(), queryVector, numCands, filter); + case FLOAT -> parentFilter != null + ? new DiversifyingChildrenFloatKnnVectorQuery(name(), queryVector, filter, numCands, parentFilter) + : new KnnFloatVectorQuery(name(), queryVector, numCands, filter); }; + if (similarityThreshold != null) { knnQuery = new VectorSimilarityQuery( knnQuery, @@ -1041,13 +1063,6 @@ public FieldMapper.Builder getMergeBuilder() { return new Builder(simpleName(), indexCreatedVersion).init(this); } - @Override - public void doValidate(MappingLookup mappers) { - if (indexed && mappers.nestedLookup().getNestedParent(name()) != null) { - throw new IllegalArgumentException("[" + CONTENT_TYPE + "] fields cannot be indexed if they're" + " within [nested] mappings"); - } - } - private static IndexOptions parseIndexOptions(String fieldName, Object propNode) { @SuppressWarnings("unchecked") Map indexOptionsMap = (Map) propNode; diff --git a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java index 83d31c71d4f76..3a96fdc9b0e0e 100644 --- a/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/NestedQueryBuilder.java @@ -120,7 +120,9 @@ public InnerHitBuilder innerHit() { public NestedQueryBuilder innerHit(InnerHitBuilder innerHitBuilder) { this.innerHitBuilder = innerHitBuilder; - innerHitBuilder.setIgnoreUnmapped(ignoreUnmapped); + if (innerHitBuilder != null) { + innerHitBuilder.setIgnoreUnmapped(ignoreUnmapped); + } return this; } diff --git a/server/src/main/java/org/elasticsearch/search/dfs/DfsKnnResults.java b/server/src/main/java/org/elasticsearch/search/dfs/DfsKnnResults.java index 5d68e520278bf..3bb6a002fb17f 100644 --- a/server/src/main/java/org/elasticsearch/search/dfs/DfsKnnResults.java +++ b/server/src/main/java/org/elasticsearch/search/dfs/DfsKnnResults.java @@ -16,15 +16,28 @@ import java.io.IOException; +import static org.elasticsearch.TransportVersions.NESTED_KNN_VECTOR_QUERY_V; + public class DfsKnnResults implements Writeable { + private final String nestedPath; private final ScoreDoc[] scoreDocs; - public DfsKnnResults(ScoreDoc[] scoreDocs) { + public DfsKnnResults(String nestedPath, ScoreDoc[] scoreDocs) { + this.nestedPath = nestedPath; this.scoreDocs = scoreDocs; } public DfsKnnResults(StreamInput in) throws IOException { scoreDocs = in.readArray(Lucene::readScoreDoc, ScoreDoc[]::new); + if (in.getTransportVersion().onOrAfter(NESTED_KNN_VECTOR_QUERY_V)) { + nestedPath = in.readOptionalString(); + } else { + nestedPath = null; + } + } + + public String getNestedPath() { + return nestedPath; } public ScoreDoc[] scoreDocs() { @@ -33,5 +46,8 @@ public ScoreDoc[] scoreDocs() { public void writeTo(StreamOutput out) throws IOException { out.writeArray(Lucene::writeScoreDoc, scoreDocs); + if (out.getTransportVersion().onOrAfter(NESTED_KNN_VECTOR_QUERY_V)) { + out.writeOptionalString(nestedPath); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java b/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java index 7844a6661dfba..eb199752f6fed 100644 --- a/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java +++ b/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java @@ -188,13 +188,16 @@ private void executeKnnVectorQuery(SearchContext context) throws IOException { } List knnResults = new ArrayList<>(knnVectorQueryBuilders.size()); for (int i = 0; i < knnSearch.size(); i++) { + String knnField = knnVectorQueryBuilders.get(i).getFieldName(); + String knnNestedPath = searchExecutionContext.nestedLookup().getNestedParent(knnField); Query knnQuery = searchExecutionContext.toQuery(knnVectorQueryBuilders.get(i)).query(); - knnResults.add(singleKnnSearch(knnQuery, knnSearch.get(i).k(), context.getProfilers(), context.searcher())); + knnResults.add(singleKnnSearch(knnQuery, knnSearch.get(i).k(), context.getProfilers(), context.searcher(), knnNestedPath)); } context.dfsResult().knnResults(knnResults); } - static DfsKnnResults singleKnnSearch(Query knnQuery, int k, Profilers profilers, ContextIndexSearcher searcher) throws IOException { + static DfsKnnResults singleKnnSearch(Query knnQuery, int k, Profilers profilers, ContextIndexSearcher searcher, String nestedPath) + throws IOException { CollectorManager topDocsCollectorManager = TopScoreDocCollector.createSharedManager( k, null, @@ -218,6 +221,6 @@ static DfsKnnResults singleKnnSearch(Query knnQuery, int k, Profilers profilers, if (profilers != null) { searcher.setProfiler(profilers.getCurrentQueryProfiler()); } - return new DfsKnnResults(topDocs.scoreDocs); + return new DfsKnnResults(nestedPath, topDocs.scoreDocs); } } diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnSearchBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnSearchBuilder.java index 5ebea733f0e5e..b7f3433df62e2 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnSearchBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnSearchBuilder.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.InnerHitBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.Rewriteable; @@ -31,6 +32,7 @@ import java.util.Objects; import java.util.function.Supplier; +import static org.elasticsearch.TransportVersions.NESTED_KNN_VECTOR_QUERY_V; import static org.elasticsearch.common.Strings.format; import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -48,6 +50,7 @@ public class KnnSearchBuilder implements Writeable, ToXContentFragment, Rewritea public static final ParseField VECTOR_SIMILARITY = new ParseField("similarity"); public static final ParseField FILTER_FIELD = new ParseField("filter"); public static final ParseField BOOST_FIELD = AbstractQueryBuilder.BOOST_FIELD; + public static final ParseField INNER_HITS_FIELD = new ParseField("inner_hits"); @SuppressWarnings("unchecked") private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("knn", args -> { @@ -90,6 +93,12 @@ public class KnnSearchBuilder implements Writeable, ToXContentFragment, Rewritea ObjectParser.ValueType.OBJECT_ARRAY ); PARSER.declareFloat(KnnSearchBuilder::boost, BOOST_FIELD); + PARSER.declareField( + KnnSearchBuilder::innerHit, + (p, c) -> InnerHitBuilder.fromXContent(p), + INNER_HITS_FIELD, + ObjectParser.ValueType.OBJECT + ); } public static KnnSearchBuilder fromXContent(XContentParser parser) throws IOException { @@ -105,6 +114,7 @@ public static KnnSearchBuilder fromXContent(XContentParser parser) throws IOExce final Float similarity; final List filterQueries; float boost = AbstractQueryBuilder.DEFAULT_BOOST; + InnerHitBuilder innerHitBuilder; /** * Defines a kNN search. @@ -219,6 +229,13 @@ public KnnSearchBuilder(StreamInput in) throws IOException { } else { this.similarity = null; } + if (in.getTransportVersion().onOrAfter(NESTED_KNN_VECTOR_QUERY_V)) { + this.innerHitBuilder = in.readOptionalWriteable(InnerHitBuilder::new); + } + } + + public String getField() { + return field; } public int k() { @@ -254,13 +271,24 @@ public KnnSearchBuilder boost(float boost) { return this; } + public KnnSearchBuilder innerHit(InnerHitBuilder innerHitBuilder) { + this.innerHitBuilder = innerHitBuilder; + return this; + } + + public InnerHitBuilder innerHit() { + return innerHitBuilder; + } + @Override public KnnSearchBuilder rewrite(QueryRewriteContext ctx) throws IOException { if (querySupplier != null) { if (querySupplier.get() == null) { return this; } - return new KnnSearchBuilder(field, querySupplier.get(), k, numCands, similarity).boost(boost).addFilterQueries(filterQueries); + return new KnnSearchBuilder(field, querySupplier.get(), k, numCands, similarity).boost(boost) + .addFilterQueries(filterQueries) + .innerHit(innerHitBuilder); } if (queryVectorBuilder != null) { SetOnce toSet = new SetOnce<>(); @@ -280,7 +308,7 @@ public KnnSearchBuilder rewrite(QueryRewriteContext ctx) throws IOException { } ll.onResponse(null); }))); - return new KnnSearchBuilder(field, toSet::get, k, numCands, filterQueries, similarity).boost(boost); + return new KnnSearchBuilder(field, toSet::get, k, numCands, filterQueries, similarity).boost(boost).innerHit(innerHitBuilder); } boolean changed = false; List rewrittenQueries = new ArrayList<>(filterQueries.size()); @@ -292,7 +320,9 @@ public KnnSearchBuilder rewrite(QueryRewriteContext ctx) throws IOException { rewrittenQueries.add(rewrittenQuery); } if (changed) { - return new KnnSearchBuilder(field, queryVector, k, numCands, similarity).boost(boost).addFilterQueries(rewrittenQueries); + return new KnnSearchBuilder(field, queryVector, k, numCands, similarity).boost(boost) + .addFilterQueries(rewrittenQueries) + .innerHit(innerHitBuilder); } return this; } @@ -317,6 +347,7 @@ public boolean equals(Object o) { && Objects.equals(querySupplier, that.querySupplier) && Objects.equals(filterQueries, that.filterQueries) && Objects.equals(similarity, that.similarity) + && Objects.equals(innerHitBuilder, that.innerHitBuilder) && boost == that.boost; } @@ -331,6 +362,7 @@ public int hashCode() { similarity, Arrays.hashCode(queryVector), Objects.hashCode(filterQueries), + innerHitBuilder, boost ); } @@ -359,6 +391,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endArray(); } + if (innerHitBuilder != null) { + builder.field(INNER_HITS_FIELD.getPreferredName(), innerHitBuilder, params); + } + if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { builder.field(BOOST_FIELD.getPreferredName(), boost); } @@ -392,5 +428,8 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_8_0)) { out.writeOptionalFloat(similarity); } + if (out.getTransportVersion().onOrAfter(NESTED_KNN_VECTOR_QUERY_V)) { + out.writeOptionalWriteable(innerHitBuilder); + } } } diff --git a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java index c4b1f3046e89d..6c261c040266b 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/KnnVectorQueryBuilder.java @@ -11,12 +11,16 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.ToChildBlockJoinQuery; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DenseVectorFieldType; import org.elasticsearch.index.query.AbstractQueryBuilder; @@ -234,6 +238,8 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { ); } + String parentPath = context.nestedLookup().getNestedParent(fieldName); + final BitSetProducer parentFilter; BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (QueryBuilder query : this.filterQueries) { builder.add(query.toQuery(context), BooleanClause.Occur.FILTER); @@ -242,9 +248,30 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { Query filterQuery = booleanQuery.clauses().isEmpty() ? null : booleanQuery; DenseVectorFieldType vectorFieldType = (DenseVectorFieldType) fieldType; + if (parentPath != null) { + NestedObjectMapper mapper = context.nestedLookup().getNestedMappers().get(parentPath); + NestedObjectMapper objectMapper = context.nestedScope().getObjectMapper(); + if (objectMapper == null) { + parentFilter = context.bitsetFilter(Queries.newNonNestedFilter(context.indexVersionCreated())); + } else { + parentFilter = context.bitsetFilter(objectMapper.nestedTypeFilter()); + } + try { + context.nestedScope().nextLevel(mapper); + if (filterQuery != null) { + filterQuery = new ToChildBlockJoinQuery(filterQuery, parentFilter); + } + return queryVector != null + ? vectorFieldType.createKnnQuery(queryVector, numCands, filterQuery, vectorSimilarity, parentFilter) + : vectorFieldType.createKnnQuery(byteQueryVector, numCands, filterQuery, vectorSimilarity, parentFilter); + } finally { + context.nestedScope().previousLevel(); + } + } + return queryVector != null - ? vectorFieldType.createKnnQuery(queryVector, numCands, filterQuery, vectorSimilarity) - : vectorFieldType.createKnnQuery(byteQueryVector, numCands, filterQuery, vectorSimilarity); + ? vectorFieldType.createKnnQuery(queryVector, numCands, filterQuery, vectorSimilarity, null) + : vectorFieldType.createKnnQuery(byteQueryVector, numCands, filterQuery, vectorSimilarity, null); } @Override diff --git a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java index 30332f93bf62c..65a0950d05b4d 100644 --- a/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/DfsQueryPhaseTests.java @@ -293,8 +293,9 @@ public void run() throws IOException { public void testRewriteShardSearchRequestWithRank() { List dkrs = List.of( - new DfsKnnResults(new ScoreDoc[] { new ScoreDoc(1, 3.0f, 1), new ScoreDoc(4, 1.5f, 1), new ScoreDoc(7, 0.1f, 2) }), + new DfsKnnResults(null, new ScoreDoc[] { new ScoreDoc(1, 3.0f, 1), new ScoreDoc(4, 1.5f, 1), new ScoreDoc(7, 0.1f, 2) }), new DfsKnnResults( + null, new ScoreDoc[] { new ScoreDoc(2, 1.75f, 2), new ScoreDoc(1, 2.0f, 1), new ScoreDoc(3, 0.25f, 2), new ScoreDoc(6, 2.5f, 2) } ) ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index f29c462dba42c..2899dab6ff303 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -744,26 +744,6 @@ public void testCannotBeUsedInMultifields() { assertThat(e.getMessage(), containsString("Field [vectors] of type [dense_vector] can't be used in multifields")); } - public void testNestedVectorsCannotBeIndexed() { - Exception e = expectThrows( - IllegalArgumentException.class, - () -> createMapperService( - fieldMapping( - b -> b.field("type", "nested") - .startObject("properties") - .startObject("vector") - .field("type", "dense_vector") - .field("dims", 4) - .field("index", true) - .field("similarity", "dot_product") - .endObject() - .endObject() - ) - ) - ); - assertThat(e.getMessage(), containsString("[dense_vector] fields cannot be indexed if they're within [nested] mappings")); - } - public void testByteVectorIndexBoundaries() throws IOException { DocumentMapper mapper = createDocumentMapper( fieldMapping( @@ -818,7 +798,7 @@ public void testByteVectorQueryBoundaries() throws IOException { Exception e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 128, 0, 0 }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 128, 0, 0 }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -827,7 +807,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0f, -129.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0f, -129.0f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -836,7 +816,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0.5f, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 0.0f, 0.5f, 0.0f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -845,7 +825,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, 0.0f, -0.25f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 0, 0.0f, -0.25f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -854,13 +834,13 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, null, null, null) ); assertThat(e.getMessage(), containsString("element_type [byte] vectors do not support NaN values but found [NaN] at dim [0];")); e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -869,7 +849,7 @@ public void testByteVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -895,13 +875,13 @@ public void testFloatVectorQueryBoundaries() throws IOException { Exception e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { Float.NaN, 0f, 0.0f }, 3, null, null, null) ); assertThat(e.getMessage(), containsString("element_type [float] vectors do not support NaN values but found [NaN] at dim [0];")); e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { Float.POSITIVE_INFINITY, 0f, 0.0f }, 3, null, null, null) ); assertThat( e.getMessage(), @@ -910,7 +890,7 @@ public void testFloatVectorQueryBoundaries() throws IOException { e = expectThrows( IllegalArgumentException.class, - () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, null, null) + () -> denseVectorFieldType.createKnnQuery(new float[] { 0, Float.NEGATIVE_INFINITY, 0.0f }, 3, null, null, null) ); assertThat( e.getMessage(), diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java index 1f9013502144e..e43fa379054bf 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java @@ -11,6 +11,9 @@ import org.apache.lucene.search.KnnByteVectorQuery; import org.apache.lucene.search.KnnFloatVectorQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.join.BitSetProducer; +import org.apache.lucene.search.join.DiversifyingChildrenByteKnnVectorQuery; +import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.mapper.FieldTypeTestCase; @@ -110,6 +113,51 @@ public void testFetchSourceValue() throws IOException { assertEquals(vector, fetchSourceValue(bft, vector)); } + public void testCreateNestedKnnQuery() { + BitSetProducer producer = context -> null; + + int dims = randomIntBetween(2, 2048); + { + DenseVectorFieldType field = new DenseVectorFieldType( + "f", + IndexVersion.current(), + DenseVectorFieldMapper.ElementType.FLOAT, + dims, + true, + VectorSimilarity.COSINE, + Collections.emptyMap() + ); + float[] queryVector = new float[dims]; + for (int i = 0; i < dims; i++) { + queryVector[i] = randomFloat(); + } + Query query = field.createKnnQuery(queryVector, 10, null, null, producer); + assertThat(query, instanceOf(DiversifyingChildrenFloatKnnVectorQuery.class)); + } + { + DenseVectorFieldType field = new DenseVectorFieldType( + "f", + IndexVersion.current(), + DenseVectorFieldMapper.ElementType.BYTE, + dims, + true, + VectorSimilarity.COSINE, + Collections.emptyMap() + ); + byte[] queryVector = new byte[dims]; + float[] floatQueryVector = new float[dims]; + for (int i = 0; i < dims; i++) { + queryVector[i] = randomByte(); + floatQueryVector[i] = queryVector[i]; + } + Query query = field.createKnnQuery(queryVector, 10, null, null, producer); + assertThat(query, instanceOf(DiversifyingChildrenByteKnnVectorQuery.class)); + + query = field.createKnnQuery(floatQueryVector, 10, null, null, producer); + assertThat(query, instanceOf(DiversifyingChildrenByteKnnVectorQuery.class)); + } + } + public void testFloatCreateKnnQuery() { DenseVectorFieldType unindexedField = new DenseVectorFieldType( "f", @@ -122,7 +170,7 @@ public void testFloatCreateKnnQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null) + () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null, null) ); assertThat(e.getMessage(), containsString("to perform knn search on field [f], its mapping must have [index] set to [true]")); @@ -137,7 +185,7 @@ public void testFloatCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> dotProductField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null) + () -> dotProductField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [dot_product] similarity can only be used with unit-length vectors.")); @@ -152,7 +200,7 @@ public void testFloatCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f }, 10, null, null) + () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f }, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [cosine] similarity does not support vectors with zero magnitude.")); } @@ -172,7 +220,7 @@ public void testCreateKnnQueryMaxDims() { for (int i = 0; i < 4096; i++) { queryVector[i] = randomFloat(); } - Query query = fieldWith4096dims.createKnnQuery(queryVector, 10, null, null); + Query query = fieldWith4096dims.createKnnQuery(queryVector, 10, null, null, null); assertThat(query, instanceOf(KnnFloatVectorQuery.class)); } @@ -190,7 +238,7 @@ public void testCreateKnnQueryMaxDims() { for (int i = 0; i < 4096; i++) { queryVector[i] = randomByte(); } - Query query = fieldWith4096dims.createKnnQuery(queryVector, 10, null, null); + Query query = fieldWith4096dims.createKnnQuery(queryVector, 10, null, null, null); assertThat(query, instanceOf(KnnByteVectorQuery.class)); } } @@ -207,7 +255,7 @@ public void testByteCreateKnnQuery() { ); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null) + () -> unindexedField.createKnnQuery(new float[] { 0.3f, 0.1f, 1.0f }, 10, null, null, null) ); assertThat(e.getMessage(), containsString("to perform knn search on field [f], its mapping must have [index] set to [true]")); @@ -222,11 +270,11 @@ public void testByteCreateKnnQuery() { ); e = expectThrows( IllegalArgumentException.class, - () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f }, 10, null, null) + () -> cosineField.createKnnQuery(new float[] { 0.0f, 0.0f, 0.0f }, 10, null, null, null) ); assertThat(e.getMessage(), containsString("The [cosine] similarity does not support vectors with zero magnitude.")); - e = expectThrows(IllegalArgumentException.class, () -> cosineField.createKnnQuery(new byte[] { 0, 0, 0 }, 10, null, null)); + e = expectThrows(IllegalArgumentException.class, () -> cosineField.createKnnQuery(new byte[] { 0, 0, 0 }, 10, null, null, null)); assertThat(e.getMessage(), containsString("The [cosine] similarity does not support vectors with zero magnitude.")); } } diff --git a/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java b/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java index 2237eaabb76e0..a0f37bcbb7fb1 100644 --- a/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/dfs/DfsPhaseTests.java @@ -74,12 +74,12 @@ public void testSingleKnnSearch() throws IOException { int k = 10; // run without profiling enabled - DfsKnnResults dfsKnnResults = DfsPhase.singleKnnSearch(query, k, null, searcher); + DfsKnnResults dfsKnnResults = DfsPhase.singleKnnSearch(query, k, null, searcher, null); assertEquals(k, dfsKnnResults.scoreDocs().length); // run with profiling enabled Profilers profilers = new Profilers(searcher); - dfsKnnResults = DfsPhase.singleKnnSearch(query, k, profilers, searcher); + dfsKnnResults = DfsPhase.singleKnnSearch(query, k, profilers, searcher, null); assertEquals(k, dfsKnnResults.scoreDocs().length); SearchProfileDfsPhaseResult searchProfileDfsPhaseResult = profilers.getDfsProfiler().buildDfsPhaseResults(); List queryProfileShardResult = searchProfileDfsPhaseResult.getQueryProfileShardResult(); From 606db050b8055807746ddeb4a9b039dd6e59a37b Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Oct 2023 13:47:53 +0200 Subject: [PATCH 045/136] Upgrade Lucene to 9.8.0 (#100128) Lucene 9.8.0 has been released, this commit updates the Elasticsearch dependency from the latest 9.8 snapshot to the finally released artifact --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 144 ++++++++++++------------ 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index dae105da9cec6..a4e990c18f5e4 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 8.11.0 -lucene = 9.8.0-snapshot-1f8e08481c2 +lucene = 9.8.0 bundled_jdk_vendor = openjdk bundled_jdk = 21+35@fd2272bbf8e04c3dbaee13770090416c diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e21168f8273cb..73bb3d50dac72 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2579,124 +2579,124 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2651005dc252b8b72307af6a8d2e62f16736e5cb Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Oct 2023 15:52:55 +0200 Subject: [PATCH 046/136] Update docs/changelog/100138.yaml --- docs/changelog/100138.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/100138.yaml diff --git a/docs/changelog/100138.yaml b/docs/changelog/100138.yaml new file mode 100644 index 0000000000000..0df2004f8539d --- /dev/null +++ b/docs/changelog/100138.yaml @@ -0,0 +1,5 @@ +pr: 100138 +summary: Upgrade main to Lucene 9.8.0 +area: Search +type: upgrade +issues: [] From 5bfaa7d2f0b1e7846b159dd949ae6193685d3dcc Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Oct 2023 16:09:27 +0200 Subject: [PATCH 047/136] Address bad merge Adjust the RegExp import in APMTracer --- .../elasticsearch/telemetry/apm/internal/tracing/APMTracer.java | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java index 0c1b3b54c19a1..866f819609515 100644 --- a/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java +++ b/modules/apm/src/main/java/org/elasticsearch/telemetry/apm/internal/tracing/APMTracer.java @@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.RegExp; import org.elasticsearch.Build; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.settings.Settings; From fd576fdd9132d2916dc07f95af1737d850c0da27 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Oct 2023 17:01:58 +0200 Subject: [PATCH 048/136] Update IndexVersion --- .../main/java/org/elasticsearch/index/IndexVersion.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersion.java b/server/src/main/java/org/elasticsearch/index/IndexVersion.java index deb0d4fbd0d80..4d193c6c59bd6 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersion.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersion.java @@ -123,9 +123,10 @@ private static IndexVersion registerIndexVersion(int id, Version luceneVersion, * READ THE COMMENT BELOW THIS BLOCK OF DECLARATIONS BEFORE ADDING NEW INDEX VERSIONS * Detached index versions added below here. */ - public static final IndexVersion V_8_500_000 = registerIndexVersion(8_500_000, Version.LUCENE_9_8_0, "bf656f5e-5808-4eee-bf8a-e2bf6736ff55"); - public static final IndexVersion V_8_500_001 = registerIndexVersion(8_500_001, Version.LUCENE_9_8_0, "45045a5a-fc57-4462-89f6-6bc04cda6015"); - public static final IndexVersion V_8_500_002 = registerIndexVersion(8_500_002, Version.LUCENE_9_8_0, "50b39bf8-6c6a-443e-a5e5-069438d843c1"); + public static final IndexVersion V_8_500_000 = registerIndexVersion(8_500_000, Version.LUCENE_9_7_0, "bf656f5e-5808-4eee-bf8a-e2bf6736ff55"); + public static final IndexVersion V_8_500_001 = registerIndexVersion(8_500_001, Version.LUCENE_9_7_0, "45045a5a-fc57-4462-89f6-6bc04cda6015"); + public static final IndexVersion V_8_500_002 = registerIndexVersion(8_500_002, Version.LUCENE_9_7_0, "50b39bf8-6c6a-443e-a5e5-069438d843c1"); + public static final IndexVersion V_8_500_003 = registerIndexVersion(8_500_003, Version.LUCENE_9_8_0, "82bb022a-7ca2-463f-9c58-67821e24b72f"); /* * STOP! READ THIS FIRST! No, really, From e2321dde37f0b0f622f5a8c43a4c227d783311d2 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Oct 2023 17:52:25 -0700 Subject: [PATCH 049/136] Avoid arithmetic overflow in LuceneCountOperator (#100153) The LuceneCountOperator has two issues that can prevent the operator from finishing (or leading to an infinite loop in the Driver): 1. It doesn't finish the scorer when we can shortcut and a partial leaf is not the first one. 2. It requests Integer.MAX_VALUE documents when performing manual counting, which leads to arithmetic overflow for a partial leaf which is not the first one (i.e, position > 0). Closes #100127 --- .../compute/lucene/LuceneCountOperator.java | 5 +- .../compute/lucene/LuceneOperator.java | 3 + .../lucene/LuceneCountOperatorTests.java | 79 ++++++++++++------- .../xpack/esql/action/EsqlActionIT.java | 66 ++++++++-------- 4 files changed, 90 insertions(+), 63 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneCountOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneCountOperator.java index e1e5b11c5b8c7..441a2056d3b72 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneCountOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneCountOperator.java @@ -121,7 +121,7 @@ public Page getOutput() { Weight weight = scorer.weight(); var leafReaderContext = scorer.leafReaderContext(); // see org.apache.lucene.search.TotalHitCountCollector - int leafCount = weight == null ? -1 : weight.count(leafReaderContext); + int leafCount = weight.count(leafReaderContext); if (leafCount != -1) { // make sure to NOT multi count as the count _shortcut_ (which is segment wide) // handle doc partitioning where the same leaf can be seen multiple times @@ -132,10 +132,11 @@ public Page getOutput() { var count = Math.min(leafCount, remainingDocs); totalHits += count; remainingDocs -= count; - scorer.markAsDone(); } + scorer.markAsDone(); } else { // could not apply shortcut, trigger the search + // TODO: avoid iterating all documents in multiple calls to make cancellation more responsive. scorer.scoreNextRange(leafCollector, leafReaderContext.reader().getLiveDocs(), remainingDocs); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java index 74baecf154fec..abb96446bb831 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java @@ -126,6 +126,9 @@ private void reinitialize() { void scoreNextRange(LeafCollector collector, Bits acceptDocs, int numDocs) throws IOException { assert isDone() == false : "scorer is exhausted"; + // avoid overflow and limit the range + numDocs = Math.min(maxPosition - position, numDocs); + assert numDocs > 0 : "scorer was exhausted"; position = bulkScorer.score(collector, acceptDocs, position, Math.min(maxPosition, position + numDocs)); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java index 9893cd2b2a023..4c92e146a25ea 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java @@ -7,9 +7,9 @@ package org.elasticsearch.compute.lucene; -import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -36,10 +36,11 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; -import java.util.function.Function; +import java.util.concurrent.CopyOnWriteArrayList; -import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -58,6 +59,7 @@ protected LuceneCountOperator.Factory simple(BigArrays bigArrays) { } private LuceneCountOperator.Factory simple(BigArrays bigArrays, DataPartitioning dataPartitioning, int numDocs, int limit) { + boolean enableShortcut = randomBoolean(); int commitEvery = Math.max(1, numDocs / 10); try ( RandomIndexWriter writer = new RandomIndexWriter( @@ -67,9 +69,14 @@ private LuceneCountOperator.Factory simple(BigArrays bigArrays, DataPartitioning ) ) { for (int d = 0; d < numDocs; d++) { - List doc = new ArrayList<>(); - doc.add(new SortedNumericDocValuesField("s", d)); + var doc = new Document(); + doc.add(new LongPoint("s", d)); writer.addDocument(doc); + if (enableShortcut == false && randomBoolean()) { + doc = new Document(); + doc.add(new LongPoint("s", randomLongBetween(numDocs * 5L, numDocs * 10L))); + writer.addDocument(doc); + } if (d % commitEvery == 0) { writer.commit(); } @@ -83,8 +90,13 @@ private LuceneCountOperator.Factory simple(BigArrays bigArrays, DataPartitioning SearchExecutionContext ectx = mock(SearchExecutionContext.class); when(ctx.getSearchExecutionContext()).thenReturn(ectx); when(ectx.getIndexReader()).thenReturn(reader); - Function queryFunction = c -> new MatchAllDocsQuery(); - return new LuceneCountOperator.Factory(List.of(ctx), queryFunction, dataPartitioning, 1, limit); + final Query query; + if (enableShortcut && randomBoolean()) { + query = new MatchAllDocsQuery(); + } else { + query = LongPoint.newRangeQuery("s", 0, numDocs); + } + return new LuceneCountOperator.Factory(List.of(ctx), c -> query, dataPartitioning, between(1, 8), limit); } @Override @@ -102,34 +114,45 @@ protected String expectedDescriptionOfSimple() { // TODO tests for the other data partitioning configurations - public void testShardDataPartitioning() { + public void testSimple() { int size = between(1_000, 20_000); - int limit = between(10, size); + int limit = randomBoolean() ? between(10, size) : Integer.MAX_VALUE; testCount(size, limit); } public void testEmpty() { - testCount(0, between(10, 10_000)); + int limit = randomBoolean() ? between(10, 10000) : Integer.MAX_VALUE; + testCount(0, limit); } private void testCount(int size, int limit) { - DriverContext ctx = driverContext(); - LuceneCountOperator.Factory factory = simple(nonBreakingBigArrays(), DataPartitioning.SHARD, size, limit); - - List results = new ArrayList<>(); - OperatorTestCase.runDriver(new Driver(ctx, factory.get(ctx), List.of(), new PageConsumerOperator(results::add), () -> {})); - OperatorTestCase.assertDriverContext(ctx); - - assertThat(results, hasSize(1)); - Page page = results.get(0); - - assertThat(page.getPositionCount(), is(1)); - assertThat(page.getBlockCount(), is(2)); - LongBlock lb = page.getBlock(0); - assertThat(lb.getPositionCount(), is(1)); - assertThat(lb.getLong(0), is((long) Math.min(size, limit))); - BooleanBlock bb = page.getBlock(1); - assertThat(bb.getBoolean(1), is(true)); + DataPartitioning dataPartitioning = randomFrom(DataPartitioning.values()); + LuceneCountOperator.Factory factory = simple(nonBreakingBigArrays(), dataPartitioning, size, limit); + List results = new CopyOnWriteArrayList<>(); + List drivers = new ArrayList<>(); + int taskConcurrency = between(1, 8); + for (int i = 0; i < taskConcurrency; i++) { + DriverContext ctx = driverContext(); + drivers.add(new Driver(ctx, factory.get(ctx), List.of(), new PageConsumerOperator(results::add), () -> {})); + } + OperatorTestCase.runDriver(drivers); + assertThat(results.size(), lessThanOrEqualTo(taskConcurrency)); + long totalCount = 0; + for (Page page : results) { + assertThat(page.getPositionCount(), is(1)); + assertThat(page.getBlockCount(), is(2)); + LongBlock lb = page.getBlock(0); + assertThat(lb.getPositionCount(), is(1)); + long count = lb.getLong(0); + assertThat(count, lessThanOrEqualTo((long) limit)); + totalCount += count; + BooleanBlock bb = page.getBlock(1); + assertTrue(bb.getBoolean(0)); + } + // We can't verify the limit if we have more than one pipeline + if (dataPartitioning == DataPartitioning.SHARD || size <= limit || taskConcurrency == 1) { + assertThat(totalCount, equalTo((long) size)); + } } /** diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java index 448c860d50b36..05580e586ef79 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.action; -import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.Build; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; import org.elasticsearch.action.bulk.BulkRequestBuilder; @@ -67,7 +66,6 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.nullValue; -@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100127") public class EsqlActionIT extends AbstractEsqlIntegTestCase { long epoch = System.currentTimeMillis(); @@ -444,43 +442,45 @@ public void testFromEvalStats() { } public void testUngroupedCountAll() { - EsqlQueryResponse results = run("from test | stats count(*)"); - logger.info(results); - Assert.assertEquals(1, results.columns().size()); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals("count(*)", results.columns().get(0).name()); - assertEquals("long", results.columns().get(0).type()); - var values = getValuesList(results).get(0); - assertEquals(1, values.size()); - assertEquals(40, (long) values.get(0)); + try (EsqlQueryResponse results = run("from test | stats count(*)")) { + logger.info(results); + Assert.assertEquals(1, results.columns().size()); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals("count(*)", results.columns().get(0).name()); + assertEquals("long", results.columns().get(0).type()); + var values = getValuesList(results).get(0); + assertEquals(1, values.size()); + assertEquals(40, (long) values.get(0)); + } } public void testUngroupedCountAllWithFilter() { - EsqlQueryResponse results = run("from test | where data > 1 | stats count(*)"); - logger.info(results); - Assert.assertEquals(1, results.columns().size()); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals("count(*)", results.columns().get(0).name()); - assertEquals("long", results.columns().get(0).type()); - var values = getValuesList(results).get(0); - assertEquals(1, values.size()); - assertEquals(20, (long) values.get(0)); + try (EsqlQueryResponse results = run("from test | where data > 1 | stats count(*)")) { + logger.info(results); + Assert.assertEquals(1, results.columns().size()); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals("count(*)", results.columns().get(0).name()); + assertEquals("long", results.columns().get(0).type()); + var values = getValuesList(results).get(0); + assertEquals(1, values.size()); + assertEquals(20, (long) values.get(0)); + } } - @AwaitsFix(bugUrl = "tracking down a 64b(long) memory leak") public void testGroupedCountAllWithFilter() { - EsqlQueryResponse results = run("from test | where data > 1 | stats count(*) by data | sort data"); - logger.info(results); - Assert.assertEquals(2, results.columns().size()); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals("count(*)", results.columns().get(0).name()); - assertEquals("long", results.columns().get(0).type()); - assertEquals("data", results.columns().get(1).name()); - assertEquals("long", results.columns().get(1).type()); - var values = getValuesList(results).get(0); - assertEquals(2, values.size()); - assertEquals(20, (long) values.get(0)); - assertEquals(2L, (long) values.get(1)); + try (EsqlQueryResponse results = run("from test | where data > 1 | stats count(*) by data | sort data")) { + logger.info(results); + Assert.assertEquals(2, results.columns().size()); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals("count(*)", results.columns().get(0).name()); + assertEquals("long", results.columns().get(0).type()); + assertEquals("data", results.columns().get(1).name()); + assertEquals("long", results.columns().get(1).type()); + var values = getValuesList(results).get(0); + assertEquals(2, values.size()); + assertEquals(20, (long) values.get(0)); + assertEquals(2L, (long) values.get(1)); + } } public void testFromStatsEvalWithPragma() { From 99f3bd372d3809707516875cf30bd02d7104b577 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Oct 2023 18:47:42 -0700 Subject: [PATCH 050/136] Fix LuceneCountOperatorTests We can't verify the limit if it's smaller than the number of documents. Relates #100153 --- .../compute/lucene/LuceneCountOperatorTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java index 4c92e146a25ea..0696d02d87af6 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneCountOperatorTests.java @@ -149,8 +149,8 @@ private void testCount(int size, int limit) { BooleanBlock bb = page.getBlock(1); assertTrue(bb.getBoolean(0)); } - // We can't verify the limit if we have more than one pipeline - if (dataPartitioning == DataPartitioning.SHARD || size <= limit || taskConcurrency == 1) { + // We can't verify the limit + if (size <= limit) { assertThat(totalCount, equalTo((long) size)); } } From 2e86d2504d6237d129e9ac9cdf0b0f35c0968ef2 Mon Sep 17 00:00:00 2001 From: Costin Leau Date: Tue, 3 Oct 2023 06:37:25 +0300 Subject: [PATCH 051/136] ESQL: Pushdown count(field) to Lucene (#100122) Use the LuceneCountOperator also for ungrouped count(field) queries Enhance the SearchStat class to indicate when a field is single or multi-value Fix #99840 --- .../xpack/esql/EsqlTestUtils.java | 5 + .../src/main/resources/stats.csv-spec | 103 +++++ .../LocalPhysicalOptimizerContext.java | 3 +- .../optimizer/LocalPhysicalPlanOptimizer.java | 41 +- .../esql/plan/physical/EsStatsQueryExec.java | 16 +- .../esql/plan/physical/FragmentExec.java | 6 +- .../esql/planner/LocalExecutionPlanner.java | 16 +- .../xpack/esql/planner/PlannerUtils.java | 2 +- .../xpack/esql/stats/SearchStats.java | 113 +++++- .../elasticsearch/xpack/esql/CsvTests.java | 7 +- .../LocalPhysicalPlanOptimizerTests.java | 372 ++++++++++++++++++ .../optimizer/PhysicalPlanOptimizerTests.java | 89 +---- .../TestLocalPhysicalPlanOptimizer.java | 9 +- .../xpack/esql/stats/DisabledSearchStats.java | 5 + .../esql/tree/EsqlNodeSubclassTests.java | 2 +- 15 files changed, 669 insertions(+), 120 deletions(-) create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 1e48caa336081..e9917b6a227f6 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -80,6 +80,11 @@ public byte[] min(String field, DataType dataType) { public byte[] max(String field, DataType dataType) { return null; } + + @Override + public boolean isSingleValue(String field) { + return false; + } } public static final TestSearchStats TEST_SEARCH_STATS = new TestSearchStats(); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 6405c082cf784..d671ba6ec13b1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -578,3 +578,106 @@ F M null ; + +countFieldNoGrouping +from employees | where emp_no < 10050 | stats c = count(salary); + +c:l +49 +; + +countFieldWithRenamingNoGrouping +from employees | rename emp_no as e, salary as s | where e < 10050 | stats c = count(s); + +c:l +49 +; + + +countFieldWithAliasNoGrouping +from employees | eval s = salary | rename s as sr | eval hidden_s = sr | rename emp_no as e | where e < 10050 | stats c = count(hidden_s); + +c:l +49 +; + +countFieldWithGrouping +from employees | rename languages as l | where emp_no < 10050 | stats c = count(emp_no) by l | sort l; + +c:l | l:i +9 | 1 +7 | 2 +6 | 3 +9 | 4 +8 | 5 +10 | null +; + +countFieldWithAliasWithGrouping +from employees | rename languages as l | eval e = emp_no | where emp_no < 10050 | stats c = count(e) by l | sort l; + +c:l | l:i +9 | 1 +7 | 2 +6 | 3 +9 | 4 +8 | 5 +10 | null +; + +countEvalExpNoGrouping +from employees | eval e = case(emp_no < 10050, emp_no, null) | stats c = count(e); + +c:l +49 +; + +countEvalExpWithGrouping +from employees | rename languages as l | eval e = case(emp_no < 10050, emp_no, null) | stats c = count(e) by l | sort l; + +c:l | l:i +9 | 1 +7 | 2 +6 | 3 +9 | 4 +8 | 5 +10 | null +; + +countAllOnOrdinalField +from employees | stats ca = count() by gender | sort gender; + +ca:l|gender:s +33 |F +57 |M +10 |null +; + +countFieldOnOrdinalField +from employees | stats ca = count(gender) by gender | sort gender; + +ca:l|gender:s +33 |F +57 |M +0 |null +; + + +countFieldVsAll +from employees | stats ca = count(), cn = count(null), cf = count(gender) by gender | sort gender; + +ca:l|cn:l|cf:l|gender:s +33 |33 |33 |F +57 |57 |57 |M +10 |10 |0 |null +; + +countMultiValue +from employees | where emp_no == 10010 | stats c = count(job_positions) by job_positions; + +c:l | job_positions:s +4 |Architect +4 |Purchase Manager +4 |Reporting Analyst +4 |Tech Lead +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalOptimizerContext.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalOptimizerContext.java index cb7f1c96c7d3f..a6b86fcdd783f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalOptimizerContext.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalOptimizerContext.java @@ -8,5 +8,6 @@ package org.elasticsearch.xpack.esql.optimizer; import org.elasticsearch.xpack.esql.session.EsqlConfiguration; +import org.elasticsearch.xpack.esql.stats.SearchStats; -public record LocalPhysicalOptimizerContext(EsqlConfiguration configuration) {} +public record LocalPhysicalOptimizerContext(EsqlConfiguration configuration, SearchStats searchStats) {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java index eb4b11f5e2e34..55ead7aa3fe4e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizer.java @@ -9,6 +9,7 @@ import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.Equals; import org.elasticsearch.xpack.esql.evaluator.predicate.operator.comparison.NotEquals; @@ -65,6 +66,7 @@ import java.util.List; import java.util.Set; import java.util.function.Supplier; +import java.util.stream.Collectors; import static java.util.Arrays.asList; import static java.util.Collections.emptyList; @@ -260,8 +262,6 @@ private static boolean isAttributePushable(Expression expression, ScalarFunction * this method is supposed to be used to define if a field can be used for exact push down (eg. sort or filter). * "aggregatable" is the most accurate information we can have from field_caps as of now. * Pushing down operations on fields that are not aggregatable would result in an error. - * @param f - * @return */ private static boolean isAggregatable(FieldAttribute f) { return f.exactAttribute().field().isAggregatable(); @@ -320,13 +320,23 @@ private List buildFieldSorts(List orders) { /** * Looks for the case where certain stats exist right before the query and thus can be pushed down. */ - private static class PushStatsToSource extends OptimizerRule { + private static class PushStatsToSource extends PhysicalOptimizerRules.ParameterizedOptimizerRule< + AggregateExec, + LocalPhysicalOptimizerContext> { @Override - protected PhysicalPlan rule(AggregateExec aggregateExec) { + protected PhysicalPlan rule(AggregateExec aggregateExec, LocalPhysicalOptimizerContext context) { PhysicalPlan plan = aggregateExec; if (aggregateExec.child() instanceof EsQueryExec queryExec) { - var tuple = pushableStats(aggregateExec); + var tuple = pushableStats(aggregateExec, context); + + // for the moment support pushing count just for one field + List stats = tuple.v2(); + if (stats.size() > 1) { + if (stats.stream().map(Stat::name).collect(Collectors.toSet()).size() > 1) { + return aggregateExec; + } + } // TODO: handle case where some aggs cannot be pushed down by breaking the aggs into two sources (regular + stats) + union // use the stats since the attributes are larger in size (due to seen) @@ -344,9 +354,9 @@ protected PhysicalPlan rule(AggregateExec aggregateExec) { return plan; } - private Tuple, List> pushableStats(AggregateExec aggregate) { + private Tuple, List> pushableStats(AggregateExec aggregate, LocalPhysicalOptimizerContext context) { AttributeMap stats = new AttributeMap<>(); - Tuple, List> tuple = new Tuple<>(new ArrayList(), new ArrayList()); + Tuple, List> tuple = new Tuple<>(new ArrayList<>(), new ArrayList<>()); if (aggregate.groupings().isEmpty()) { for (NamedExpression agg : aggregate.aggregates()) { @@ -356,9 +366,24 @@ private Tuple, List> pushableStats(AggregateExec aggregate Expression child = as.child(); if (child instanceof Count count) { var target = count.field(); + String fieldName = null; + QueryBuilder query = null; // TODO: add count over field (has to be field attribute) if (target.foldable()) { - return new Stat(StringUtils.WILDCARD, COUNT); + fieldName = StringUtils.WILDCARD; + } + // check if regular field + else { + if (target instanceof FieldAttribute fa) { + var fName = fa.name(); + if (context.searchStats().isSingleValue(fName)) { + fieldName = fa.name(); + query = QueryBuilders.existsQuery(fieldName); + } + } + } + if (fieldName != null) { + return new Stat(fieldName, COUNT, query); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java index 8e65e66e3045f..fb62191395a61 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsStatsQueryExec.java @@ -15,10 +15,13 @@ import org.elasticsearch.xpack.ql.tree.NodeInfo; import org.elasticsearch.xpack.ql.tree.NodeUtils; import org.elasticsearch.xpack.ql.tree.Source; +import org.elasticsearch.xpack.ql.util.Queries; import java.util.List; import java.util.Objects; +import static java.util.Arrays.asList; + /** * Specialized query class for retrieving statistics about the underlying data and not the actual documents. * For that see {@link EsQueryExec} @@ -29,10 +32,15 @@ public enum StatsType { COUNT, MIN, MAX, - EXISTS; + EXISTS } - public record Stat(String name, StatsType type) {}; + public record Stat(String name, StatsType type, QueryBuilder query) { + + public QueryBuilder filter(QueryBuilder sourceQuery) { + return query == null ? sourceQuery : Queries.combine(Queries.Clause.FILTER, asList(sourceQuery, query)); + } + } private final EsIndex index; private final QueryBuilder query; @@ -69,6 +77,10 @@ public QueryBuilder query() { return query; } + public List stats() { + return stats; + } + @Override public List output() { return attrs; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FragmentExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FragmentExec.java index addc66c106abd..43fccf4cf62da 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FragmentExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/FragmentExec.java @@ -25,13 +25,13 @@ public class FragmentExec extends LeafExec implements EstimatesRowSize { * Estimate of the number of bytes that'll be loaded per position before * the stream of pages is consumed. */ - private final Integer estimatedRowSize; + private final int estimatedRowSize; public FragmentExec(LogicalPlan fragment) { - this(fragment.source(), fragment, null, null); + this(fragment.source(), fragment, null, 0); } - public FragmentExec(Source source, LogicalPlan fragment, QueryBuilder esFilter, Integer estimatedRowSize) { + public FragmentExec(Source source, LogicalPlan fragment, QueryBuilder esFilter, int estimatedRowSize) { super(source); this.fragment = fragment; this.esFilter = esFilter; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 156b93e1551c4..f3601873543e3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -88,7 +88,6 @@ import org.elasticsearch.xpack.ql.util.Holder; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -99,6 +98,7 @@ import java.util.function.Function; import java.util.stream.Stream; +import static java.util.Arrays.asList; import static java.util.stream.Collectors.joining; import static org.elasticsearch.compute.lucene.LuceneOperator.NO_LIMIT; import static org.elasticsearch.compute.operator.LimitOperator.Factory; @@ -239,9 +239,15 @@ private PhysicalOperation planEsStats(EsStatsQueryExec statsQuery, LocalExecutio if (physicalOperationProviders instanceof EsPhysicalOperationProviders == false) { throw new EsqlIllegalArgumentException("EsStatsQuery should only occur against a Lucene backend"); } - EsPhysicalOperationProviders esProvider = (EsPhysicalOperationProviders) physicalOperationProviders; + if (statsQuery.stats().size() > 1) { + throw new EsqlIllegalArgumentException("EsStatsQuery currently supports only one field statistic"); + } - Function querySupplier = EsPhysicalOperationProviders.querySupplier(statsQuery.query()); + // for now only one stat is supported + EsStatsQueryExec.Stat stat = statsQuery.stats().get(0); + + EsPhysicalOperationProviders esProvider = (EsPhysicalOperationProviders) physicalOperationProviders; + Function querySupplier = EsPhysicalOperationProviders.querySupplier(stat.filter(statsQuery.query())); Expression limitExp = statsQuery.limit(); int limit = limitExp != null ? (Integer) limitExp.fold() : NO_LIMIT; @@ -411,8 +417,8 @@ private PhysicalOperation planTopN(TopNExec topNExec, LocalExecutionPlannerConte return source.with( new TopNOperatorFactory( limit, - Arrays.asList(elementTypes), - Arrays.asList(encoders), + asList(elementTypes), + asList(encoders), orders, context.pageSize(2000 + topNExec.estimatedRowSize()) ), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index 75f704b9f14ea..483bd8ce943ee 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -93,7 +93,7 @@ public static PhysicalPlan localPlan(List searchContexts, EsqlCon public static PhysicalPlan localPlan(EsqlConfiguration configuration, PhysicalPlan plan, SearchStats searchStats) { final var logicalOptimizer = new LocalLogicalPlanOptimizer(new LocalLogicalOptimizerContext(configuration, searchStats)); - var physicalOptimizer = new LocalPhysicalPlanOptimizer(new LocalPhysicalOptimizerContext(configuration)); + var physicalOptimizer = new LocalPhysicalPlanOptimizer(new LocalPhysicalOptimizerContext(configuration, searchStats)); return localPlan(plan, logicalOptimizer, physicalOptimizer); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java index f9a70cefbb57a..b5d75a1528493 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/stats/SearchStats.java @@ -18,6 +18,12 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.ConstantFieldType; +import org.elasticsearch.index.mapper.DocCountFieldMapper.DocCountFieldType; +import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper.NumberFieldType; +import org.elasticsearch.index.mapper.SeqNoFieldMapper; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.ql.type.DataType; @@ -27,6 +33,10 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.index.mapper.DataStreamTimestampFieldMapper.TimestampFieldType; +import static org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; +import static org.elasticsearch.index.mapper.KeywordFieldMapper.KeywordFieldType; + public class SearchStats { private final List contexts; @@ -35,6 +45,7 @@ private static class FieldStat { private Long count; private Boolean exists; private Object min, max; + private Boolean singleValue; } private static final int CACHE_SIZE = 32; @@ -53,7 +64,10 @@ public SearchStats(List contexts) { public long count() { var count = new long[] { 0 }; - boolean completed = doWithContexts(r -> count[0] += r.numDocs(), false); + boolean completed = doWithContexts(r -> { + count[0] += r.numDocs(); + return true; + }, false); return completed ? count[0] : -1; } @@ -61,7 +75,10 @@ public long count(String field) { var stat = cache.computeIfAbsent(field, s -> new FieldStat()); if (stat.count == null) { var count = new long[] { 0 }; - boolean completed = doWithContexts(r -> count[0] += countEntries(r, field), false); + boolean completed = doWithContexts(r -> { + count[0] += countEntries(r, field); + return true; + }, false); stat.count = completed ? count[0] : -1; } return stat.count; @@ -70,7 +87,10 @@ public long count(String field) { public long count(String field, BytesRef value) { var count = new long[] { 0 }; Term term = new Term(field, value); - boolean completed = doWithContexts(r -> count[0] += r.docFreq(term), false); + boolean completed = doWithContexts(r -> { + count[0] += r.docFreq(term); + return true; + }, false); return completed ? count[0] : -1; } @@ -104,7 +124,7 @@ public byte[] min(String field, DataType dataType) { throw new EsqlIllegalArgumentException("Don't know how to compare with previous min"); } } - + return true; }, true); stat.min = min[0]; } @@ -115,7 +135,6 @@ public byte[] min(String field, DataType dataType) { public byte[] max(String field, DataType dataType) { var stat = cache.computeIfAbsent(field, s -> new FieldStat()); if (stat.max == null) { - var max = new byte[][] { null }; doWithContexts(r -> { byte[] localMax = PointValues.getMaxPackedValue(r, field); @@ -127,6 +146,7 @@ public byte[] max(String field, DataType dataType) { throw new EsqlIllegalArgumentException("Don't know how to compare with previous max"); } } + return true; }, true); stat.max = max[0]; } @@ -134,6 +154,79 @@ public byte[] max(String field, DataType dataType) { return null; } + public boolean isSingleValue(String field) { + var stat = cache.computeIfAbsent(field, s -> new FieldStat()); + if (stat.singleValue == null) { + // there's no such field so no need to worry about multi-value fields + if (exists(field) == false) { + stat.singleValue = true; + } else { + var sv = new boolean[] { false }; + for (SearchContext context : contexts) { + MappedFieldType mappedType = context.getSearchExecutionContext().getFieldType(field); + doWithContexts(r -> { + sv[0] &= detectSingleValue(r, mappedType, field); + return sv[0]; + }, true); + break; + } + stat.singleValue = sv[0]; + } + } + return stat.singleValue; + } + + private boolean detectSingleValue(IndexReader r, MappedFieldType fieldType, String name) throws IOException { + // types that are always single value (and are accessible through instanceof) + if (fieldType instanceof ConstantFieldType || fieldType instanceof DocCountFieldType || fieldType instanceof TimestampFieldType) { + return true; + } + + var typeName = fieldType.typeName(); + + // non-visible fields, check their names + boolean found = switch (typeName) { + case IdFieldMapper.NAME, SeqNoFieldMapper.NAME -> true; + default -> false; + }; + + if (found) { + return true; + } + + // check against doc size + DocCountTester tester = null; + if (fieldType instanceof DateFieldType || fieldType instanceof NumberFieldType) { + tester = lr -> { + PointValues values = lr.getPointValues(name); + return values == null || values.size() == values.getDocCount(); + }; + } else if (fieldType instanceof KeywordFieldType) { + tester = lr -> { + Terms terms = lr.terms(name); + return terms == null || terms.size() == terms.getDocCount(); + }; + } + + if (tester != null) { + // check each leaf + for (LeafReaderContext context : r.leaves()) { + if (tester.test(context.reader()) == false) { + return false; + } + } + // field is missing or single value + return true; + } + + // unsupported type - default to MV + return false; + } + + private interface DocCountTester { + Boolean test(LeafReader leafReader) throws IOException; + } + // // @see org.elasticsearch.search.query.QueryPhaseCollectorManager#shortcutTotalHitCount(IndexReader, Query) // @@ -172,7 +265,10 @@ private static int countEntries(IndexReader indexReader, String field) { } private interface IndexReaderConsumer { - void consume(IndexReader reader) throws IOException; + /** + * Returns true if the consumer should keep on going, false otherwise. + */ + boolean consume(IndexReader reader) throws IOException; } private boolean doWithContexts(IndexReaderConsumer consumer, boolean acceptsDeletions) { @@ -183,7 +279,10 @@ private boolean doWithContexts(IndexReaderConsumer consumer, boolean acceptsDele if (acceptsDeletions == false && reader.hasDeletions()) { return false; } - consumer.consume(reader); + // check if the looping continues or not + if (consumer.consume(reader) == false) { + return false; + } } } return true; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index eb200067fce0c..3f42b037ebbe0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -362,10 +362,11 @@ private ActualResults executePlan() throws Exception { LocalExecutionPlan coordinatorNodeExecutionPlan = executionPlanner.plan(new OutputExec(coordinatorPlan, collectedPages::add)); drivers.addAll(coordinatorNodeExecutionPlan.createDrivers(sessionId)); if (dataNodePlan != null) { - var logicalTestOptimizer = new LocalLogicalPlanOptimizer( - new LocalLogicalOptimizerContext(configuration, new DisabledSearchStats()) + var searchStats = new DisabledSearchStats(); + var logicalTestOptimizer = new LocalLogicalPlanOptimizer(new LocalLogicalOptimizerContext(configuration, searchStats)); + var physicalTestOptimizer = new TestLocalPhysicalPlanOptimizer( + new LocalPhysicalOptimizerContext(configuration, searchStats) ); - var physicalTestOptimizer = new TestLocalPhysicalPlanOptimizer(new LocalPhysicalOptimizerContext(configuration)); var csvDataNodePhysicalPlan = PlannerUtils.localPlan(dataNodePlan, logicalTestOptimizer, physicalTestOptimizer); exchangeSource.addRemoteSink(exchangeSink::fetchPageAsync, randomIntBetween(1, 3)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java new file mode 100644 index 0000000000000..3cab0604b0688 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -0,0 +1,372 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.enrich.EnrichPolicy; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.analysis.Verifier; +import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolution; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; +import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; +import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; +import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.Stat; +import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; +import org.elasticsearch.xpack.esql.plan.physical.EvalExec; +import org.elasticsearch.xpack.esql.plan.physical.ExchangeExec; +import org.elasticsearch.xpack.esql.plan.physical.LimitExec; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import org.elasticsearch.xpack.esql.planner.FilterTests; +import org.elasticsearch.xpack.esql.planner.Mapper; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; +import org.elasticsearch.xpack.esql.plugin.QueryPragmas; +import org.elasticsearch.xpack.esql.session.EsqlConfiguration; +import org.elasticsearch.xpack.esql.stats.DisabledSearchStats; +import org.elasticsearch.xpack.esql.stats.Metrics; +import org.elasticsearch.xpack.esql.stats.SearchStats; +import org.elasticsearch.xpack.esql.type.EsqlDataTypes; +import org.elasticsearch.xpack.ql.expression.Expressions; +import org.elasticsearch.xpack.ql.expression.function.FunctionRegistry; +import org.elasticsearch.xpack.ql.index.EsIndex; +import org.elasticsearch.xpack.ql.index.IndexResolution; +import org.elasticsearch.xpack.ql.type.DataTypes; +import org.elasticsearch.xpack.ql.type.EsField; +import org.junit.Before; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static java.util.Arrays.asList; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.as; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.configuration; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.plan.physical.AggregateExec.Mode.FINAL; +import static org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.StatsType; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.nullValue; + +public class LocalPhysicalPlanOptimizerTests extends ESTestCase { + + private static final String PARAM_FORMATTING = "%1$s"; + + /** + * Estimated size of a keyword field in bytes. + */ + private static final int KEYWORD_EST = EstimatesRowSize.estimateSize(DataTypes.KEYWORD); + + private EsqlParser parser; + private Analyzer analyzer; + private LogicalPlanOptimizer logicalOptimizer; + private PhysicalPlanOptimizer physicalPlanOptimizer; + private Mapper mapper; + private Map mapping; + private int allFieldRowSize; + + private final EsqlConfiguration config; + private final SearchStats IS_SV_STATS = new EsqlTestUtils.TestSearchStats() { + @Override + public boolean isSingleValue(String field) { + return true; + } + }; + + @ParametersFactory(argumentFormatting = PARAM_FORMATTING) + public static List readScriptSpec() { + return settings().stream().map(t -> { + var settings = Settings.builder().loadFromMap(t.v2()).build(); + return new Object[] { t.v1(), configuration(new QueryPragmas(settings)) }; + }).toList(); + } + + private static List>> settings() { + return asList(new Tuple<>("default", Map.of())); + } + + public LocalPhysicalPlanOptimizerTests(String name, EsqlConfiguration config) { + this.config = config; + } + + @Before + public void init() { + parser = new EsqlParser(); + + mapping = loadMapping("mapping-basic.json"); + allFieldRowSize = mapping.values() + .stream() + .mapToInt( + f -> (EstimatesRowSize.estimateSize(EsqlDataTypes.widenSmallNumericTypes(f.getDataType())) + f.getProperties() + .values() + .stream() + // check one more level since the mapping contains TEXT fields with KEYWORD multi-fields + .mapToInt(x -> EstimatesRowSize.estimateSize(EsqlDataTypes.widenSmallNumericTypes(x.getDataType()))) + .sum()) + ) + .sum(); + EsIndex test = new EsIndex("test", mapping); + IndexResolution getIndexResult = IndexResolution.valid(test); + logicalOptimizer = new LogicalPlanOptimizer(); + physicalPlanOptimizer = new PhysicalPlanOptimizer(new PhysicalOptimizerContext(config)); + FunctionRegistry functionRegistry = new EsqlFunctionRegistry(); + mapper = new Mapper(functionRegistry); + var enrichResolution = new EnrichResolution( + Set.of( + new EnrichPolicyResolution( + "foo", + new EnrichPolicy(EnrichPolicy.MATCH_TYPE, null, List.of("idx"), "fld", List.of("a", "b")), + IndexResolution.valid( + new EsIndex( + "idx", + Map.ofEntries( + Map.entry("a", new EsField("a", DataTypes.INTEGER, Map.of(), true)), + Map.entry("b", new EsField("b", DataTypes.LONG, Map.of(), true)) + ) + ) + ) + ) + ), + Set.of("foo") + ); + + analyzer = new Analyzer( + new AnalyzerContext(config, functionRegistry, getIndexResult, enrichResolution), + new Verifier(new Metrics()) + ); + } + + /** + * Expects + * LimitExec[500[INTEGER]] + * \_AggregateExec[[],[COUNT([2a][KEYWORD]) AS c],FINAL,null] + * \_ExchangeExec[[count{r}#24, seen{r}#25],true] + * \_EsStatsQueryExec[test], stats[Stat[name=*, type=COUNT, query=null]]], query[{"esql_single_value":{"field":"emp_no","next": + * {"range":{"emp_no":{"lt":10050,"boost":1.0}}}}}][count{r}#40, seen{r}#41], limit[], + */ + // TODO: this is suboptimal due to eval not being removed/folded + public void testCountAllWithEval() { + var plan = plan(""" + from test | eval s = salary | rename s as sr | eval hidden_s = sr | rename emp_no as e | where e < 10050 + | stats c = count(*) + """); + var stat = queryStatsFor(plan); + assertThat(stat.type(), is(StatsType.COUNT)); + assertThat(stat.query(), is(nullValue())); + } + + /** + * Expects + * LimitExec[500[INTEGER]] + * \_AggregateExec[[],[COUNT([2a][KEYWORD]) AS c],FINAL,null] + * \_ExchangeExec[[count{r}#14, seen{r}#15],true] + * \_EsStatsQueryExec[test], stats[Stat[name=*, type=COUNT, query=null]]], + * query[{"esql_single_value":{"field":"emp_no","next":{"range":{"emp_no":{"gt":10040,"boost":1.0}}}}}][count{r}#30, seen{r}#31], + * limit[], + */ + public void testCountAllWithFilter() { + var plan = plan("from test | where emp_no > 10040 | stats c = count(*)"); + var stat = queryStatsFor(plan); + assertThat(stat.type(), is(StatsType.COUNT)); + assertThat(stat.query(), is(nullValue())); + } + + /** + * Expects + * LimitExec[500[INTEGER]] + * \_AggregateExec[[],[COUNT(emp_no{f}#5) AS c],FINAL,null] + * \_ExchangeExec[[count{r}#15, seen{r}#16],true] + * \_EsStatsQueryExec[test], stats[Stat[name=emp_no, type=COUNT, query={ + * "exists" : { + * "field" : "emp_no", + * "boost" : 1.0 + * } + * }]]], query[{"esql_single_value":{"field":"emp_no","next":{"range":{"emp_no":{"gt":10040,"boost":1.0}}}}}][count{r}#31, seen{r}#32], + * limit[], + */ + public void testCountFieldWithFilter() { + var plan = plan("from test | where emp_no > 10040 | stats c = count(emp_no)", IS_SV_STATS); + var stat = queryStatsFor(plan); + assertThat(stat.type(), is(StatsType.COUNT)); + assertThat(stat.query(), is(QueryBuilders.existsQuery("emp_no"))); + } + + /** + * Expects - for now + * LimitExec[500[INTEGER]] + * \_AggregateExec[[],[COUNT(hidden_s{r}#8) AS c],FINAL,null] + * \_ExchangeExec[[count{r}#25, seen{r}#26],true] + * \_AggregateExec[[],[COUNT(hidden_s{r}#8) AS c],PARTIAL,8] + * \_EvalExec[[salary{f}#20 AS s, s{r}#3 AS hidden_s]] + * \_FieldExtractExec[salary{f}#20] + * \_EsQueryExec[test], query[{"esql_single_value":{"field":"emp_no","next":{"range":{"emp_no":{"lt":10050,"boost":1.0}}}}}] + * [_doc{f}#42], limit[], sort[] estimatedRowSize[16] + */ + // TODO: the eval is not yet optimized away + public void testCountFieldWithEval() { + var plan = plan(""" + from test | eval s = salary | rename s as sr | eval hidden_s = sr | rename emp_no as e | where e < 10050 + | stats c = count(hidden_s) + """, IS_SV_STATS); + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + var exg = as(agg.child(), ExchangeExec.class); + agg = as(exg.child(), AggregateExec.class); + var eval = as(agg.child(), EvalExec.class); + } + + // optimized doesn't know yet how to push down count over field + public void testCountOneFieldWithFilter() { + var plan = plan(""" + from test + | where salary > 1000 + | stats c = count(salary) + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + assertThat(Expressions.names(agg.aggregates()), contains("c")); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.limit(), is(nullValue())); + assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + var stat = as(esStatsQuery.stats().get(0), Stat.class); + assertThat(stat.query(), is(QueryBuilders.existsQuery("salary"))); + var expected = wrapWithSingleQuery(QueryBuilders.rangeQuery("salary").gt(1000), "salary"); + assertThat(expected.toString(), is(esStatsQuery.query().toString())); + } + + // optimized doesn't know yet how to push down count over field + public void testCountOneFieldWithFilterAndLimit() { + var plan = plan(""" + from test + | where salary > 1000 + | limit 10 + | stats c = count(salary) + """, IS_SV_STATS); + assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); + } + + // optimized doesn't know yet how to break down different multi count + public void testCountMultipleFieldsWithFilter() { + var plan = plan(""" + from test + | where salary > 1000 and emp_no > 10010 + | stats cs = count(salary), ce = count(emp_no) + """, IS_SV_STATS); + assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); + } + + public void testAnotherCountAllWithFilter() { + var plan = plan(""" + from test + | where emp_no > 10010 + | stats c = count() + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + assertThat(Expressions.names(agg.aggregates()), contains("c")); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.limit(), is(nullValue())); + assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + var expected = wrapWithSingleQuery(QueryBuilders.rangeQuery("emp_no").gt(10010), "emp_no"); + assertThat(expected.toString(), is(esStatsQuery.query().toString())); + } + + @AwaitsFix(bugUrl = "intermediateAgg does proper reduction but the agg itself does not - the optimizer needs to improve") + public void testMultiCountAllWithFilter() { + var plan = plan(""" + from test + | where emp_no > 10010 + | stats c = count(), call = count(*), c_literal = count(1) + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + assertThat(Expressions.names(agg.aggregates()), contains("c", "call", "c_literal")); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + assertThat(esStatsQuery.limit(), is(nullValue())); + assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); + var expected = wrapWithSingleQuery(QueryBuilders.rangeQuery("emp_no").gt(10010), "emp_no"); + assertThat(expected.toString(), is(esStatsQuery.query().toString())); + } + + // optimized doesn't know yet how to break down different multi count + public void testCountFieldsAndAllWithFilter() { + var plan = plan(""" + from test + | where emp_no > 10010 + | stats c = count(), cs = count(salary), ce = count(emp_no) + """, IS_SV_STATS); + assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); + } + + private QueryBuilder wrapWithSingleQuery(QueryBuilder inner, String fieldName) { + return FilterTests.singleValueQuery(inner, fieldName); + } + + private Stat queryStatsFor(PhysicalPlan plan) { + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + var exg = as(agg.child(), ExchangeExec.class); + var statSource = as(exg.child(), EsStatsQueryExec.class); + var stats = statSource.stats(); + assertThat(stats, hasSize(1)); + var stat = stats.get(0); + return stat; + } + + private PhysicalPlan plan(String query) { + return plan(query, EsqlTestUtils.TEST_SEARCH_STATS); + } + + private PhysicalPlan plan(String query, SearchStats stats) { + var physical = optimizedPlan(physicalPlan(query), stats); + return physical; + } + + private PhysicalPlan optimizedPlan(PhysicalPlan plan, SearchStats searchStats) { + // System.out.println("* Physical Before\n" + plan); + var p = EstimatesRowSize.estimateRowSize(0, physicalPlanOptimizer.optimize(plan)); + // System.out.println("* Physical After\n" + p); + // the real execution breaks the plan at the exchange and then decouples the plan + // this is of no use in the unit tests, which checks the plan as a whole instead of each + // individually hence why here the plan is kept as is + + var logicalTestOptimizer = new LocalLogicalPlanOptimizer(new LocalLogicalOptimizerContext(config, new DisabledSearchStats())); + var physicalTestOptimizer = new TestLocalPhysicalPlanOptimizer(new LocalPhysicalOptimizerContext(config, searchStats), true); + var l = PlannerUtils.localPlan(plan, logicalTestOptimizer, physicalTestOptimizer); + + // System.out.println("* Localized DataNode Plan\n" + l); + return l; + } + + private PhysicalPlan physicalPlan(String query) { + var logical = logicalOptimizer.optimize(analyzer.analyze(parser.createStatement(query))); + // System.out.println("Logical\n" + logical); + var physical = mapper.map(logical); + return physical; + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 4d25252c1409b..9f3bef6d064e6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -14,14 +14,12 @@ import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.query.RegexpQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.analysis.Analyzer; @@ -43,7 +41,6 @@ import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec.FieldSort; import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec; -import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; import org.elasticsearch.xpack.esql.plan.physical.EvalExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeExec; @@ -56,7 +53,6 @@ import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.plan.physical.ProjectExec; import org.elasticsearch.xpack.esql.plan.physical.TopNExec; -import org.elasticsearch.xpack.esql.planner.FilterTests; import org.elasticsearch.xpack.esql.planner.Mapper; import org.elasticsearch.xpack.esql.planner.PhysicalVerificationException; import org.elasticsearch.xpack.esql.planner.PlannerUtils; @@ -107,7 +103,7 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; -@TestLogging(value = "org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer:TRACE", reason = "debug") +//@TestLogging(value = "org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer:TRACE", reason = "debug") public class PhysicalPlanOptimizerTests extends ESTestCase { private static final String PARAM_FORMATTING = "%1$s"; @@ -1862,86 +1858,6 @@ public void testAvgSurrogateFunctionAfterRenameAndLimit() { assertThat(source.limit().fold(), equalTo(10)); } - // optimized doesn't know yet how to push down count over field - public void testCountOneFieldWithFilter() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where salary > 1000 - | stats c = count(salary) - """)); - assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); - } - - // optimized doesn't know yet how to push down count over field - public void testCountOneFieldWithFilterAndLimit() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where salary > 1000 - | limit 10 - | stats c = count(salary) - """)); - assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); - } - - // optimized doesn't know yet how to break down different multi count - public void testCountMultipleFieldsWithFilter() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where salary > 1000 and emp_no > 10010 - | stats cs = count(salary), ce = count(emp_no) - """)); - assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); - } - - public void testCountAllWithFilter() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where emp_no > 10010 - | stats c = count() - """)); - - var limit = as(plan, LimitExec.class); - var agg = as(limit.child(), AggregateExec.class); - assertThat(agg.getMode(), is(FINAL)); - assertThat(Expressions.names(agg.aggregates()), contains("c")); - var exchange = as(agg.child(), ExchangeExec.class); - var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); - assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); - var expected = wrapWithSingleQuery(QueryBuilders.rangeQuery("emp_no").gt(10010), "emp_no"); - assertThat(expected.toString(), is(esStatsQuery.query().toString())); - } - - @AwaitsFix(bugUrl = "intermediateAgg does proper reduction but the agg itself does not - the optimizer needs to improve") - public void testMultiCountAllWithFilter() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where emp_no > 10010 - | stats c = count(), call = count(*), c_literal = count(1) - """)); - - var limit = as(plan, LimitExec.class); - var agg = as(limit.child(), AggregateExec.class); - assertThat(agg.getMode(), is(FINAL)); - assertThat(Expressions.names(agg.aggregates()), contains("c", "call", "c_literal")); - var exchange = as(agg.child(), ExchangeExec.class); - var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); - assertThat(esStatsQuery.limit(), is(nullValue())); - assertThat(Expressions.names(esStatsQuery.output()), contains("count", "seen")); - var expected = wrapWithSingleQuery(QueryBuilders.rangeQuery("emp_no").gt(10010), "emp_no"); - assertThat(expected.toString(), is(esStatsQuery.query().toString())); - } - - // optimized doesn't know yet how to break down different multi count - public void testCountFieldsAndAllWithFilter() { - var plan = optimizedPlan(physicalPlan(""" - from test - | where emp_no > 10010 - | stats c = count(), cs = count(salary), ce = count(emp_no) - """)); - assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); - } - private static EsQueryExec source(PhysicalPlan plan) { if (plan instanceof ExchangeExec exchange) { plan = exchange.child(); @@ -1997,7 +1913,4 @@ private QueryBuilder sv(QueryBuilder builder, String fieldName) { return sv.next(); } - private QueryBuilder wrapWithSingleQuery(QueryBuilder inner, String fieldName) { - return FilterTests.singleValueQuery(inner, fieldName); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestLocalPhysicalPlanOptimizer.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestLocalPhysicalPlanOptimizer.java index a4e94254b94ae..0d1e85171bac3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestLocalPhysicalPlanOptimizer.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestLocalPhysicalPlanOptimizer.java @@ -13,12 +13,19 @@ public class TestLocalPhysicalPlanOptimizer extends LocalPhysicalPlanOptimizer { + private final boolean esRules; + public TestLocalPhysicalPlanOptimizer(LocalPhysicalOptimizerContext context) { + this(context, false); + } + + public TestLocalPhysicalPlanOptimizer(LocalPhysicalOptimizerContext context, boolean esRules) { super(context); + this.esRules = esRules; } @Override protected List> batches() { - return rules(false); + return rules(esRules); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java index 72c255d5e5388..1cda9323af89a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/DisabledSearchStats.java @@ -47,4 +47,9 @@ public byte[] min(String field, DataType dataType) { public byte[] max(String field, DataType dataType) { return null; } + + @Override + public boolean isSingleValue(String field) { + return false; + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java index 640dd410d8573..c4350c8ec74d7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/tree/EsqlNodeSubclassTests.java @@ -102,7 +102,7 @@ protected Object pluggableMakeArg(Class> toBuildClass, Class Date: Tue, 3 Oct 2023 09:47:39 +0200 Subject: [PATCH 052/136] ESQL: Add block factory to GROK command (#100139) Add DriverContext and BlockFactory to GROK command to properly handle circuit breaking See https://github.com/elastic/elasticsearch/issues/99826 --- .../operator/ColumnExtractOperator.java | 48 +++++++++++-------- .../operator/ColumnExtractOperatorTests.java | 3 +- .../src/main/resources/grok.csv-spec | 3 +- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ColumnExtractOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ColumnExtractOperator.java index 0ccf575fc030d..58bf9e097bec3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ColumnExtractOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/ColumnExtractOperator.java @@ -27,7 +27,7 @@ public record Factory( @Override public Operator get(DriverContext driverContext) { - return new ColumnExtractOperator(types, inputEvalSupplier.get(driverContext), evaluatorSupplier.get()); + return new ColumnExtractOperator(types, inputEvalSupplier.get(driverContext), evaluatorSupplier.get(), driverContext); } @Override @@ -39,15 +39,18 @@ public String describe() { private final ElementType[] types; private final EvalOperator.ExpressionEvaluator inputEvaluator; private final ColumnExtractOperator.Evaluator evaluator; + private final DriverContext driverContext; public ColumnExtractOperator( ElementType[] types, - EvalOperator.ExpressionEvaluator inputEvaluator, - ColumnExtractOperator.Evaluator evaluator + ExpressionEvaluator inputEvaluator, + Evaluator evaluator, + DriverContext driverContext ) { this.types = types; this.inputEvaluator = inputEvaluator; this.evaluator = evaluator; + this.driverContext = driverContext; } @Override @@ -55,28 +58,33 @@ protected Page process(Page page) { int rowsCount = page.getPositionCount(); Block.Builder[] blockBuilders = new Block.Builder[types.length]; - for (int i = 0; i < types.length; i++) { - blockBuilders[i] = types[i].newBlockBuilder(rowsCount); - } + try { + for (int i = 0; i < types.length; i++) { + blockBuilders[i] = types[i].newBlockBuilder(rowsCount, driverContext.blockFactory()); + } - try (Block.Ref ref = inputEvaluator.eval(page)) { - BytesRefBlock input = (BytesRefBlock) ref.block(); - BytesRef spare = new BytesRef(); - for (int row = 0; row < rowsCount; row++) { - if (input.isNull(row)) { - for (int i = 0; i < blockBuilders.length; i++) { - blockBuilders[i].appendNull(); + try (Block.Ref ref = inputEvaluator.eval(page)) { + BytesRefBlock input = (BytesRefBlock) ref.block(); + BytesRef spare = new BytesRef(); + for (int row = 0; row < rowsCount; row++) { + if (input.isNull(row)) { + for (int i = 0; i < blockBuilders.length; i++) { + blockBuilders[i].appendNull(); + } + continue; } - continue; + evaluator.computeRow(input, row, blockBuilders, spare); + } + + Block[] blocks = new Block[blockBuilders.length]; + for (int i = 0; i < blockBuilders.length; i++) { + blocks[i] = blockBuilders[i].build(); } - evaluator.computeRow(input, row, blockBuilders, spare); - } - Block[] blocks = new Block[blockBuilders.length]; - for (int i = 0; i < blockBuilders.length; i++) { - blocks[i] = blockBuilders[i].build(); + return page.appendBlocks(blocks); } - return page.appendBlocks(blocks); + } finally { + Releasables.closeExpectNoException(blockBuilders); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/ColumnExtractOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/ColumnExtractOperatorTests.java index 8e0be216ed477..49f3382f8ad2d 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/ColumnExtractOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/ColumnExtractOperatorTests.java @@ -89,7 +89,6 @@ protected void assertSimpleOutput(List input, List results) { @Override protected ByteSizeValue smallEnoughToCircuitBreak() { - assumeTrue("doesn't use big arrays so can't break", false); - return null; + return ByteSizeValue.ofBytes(between(1, 32)); } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec index 9dc9444de0155..75dfdd783a7cd 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/grok.csv-spec @@ -83,7 +83,8 @@ Anneke Preusig | Anneke | Preusig ; -grokStats +# AwaitsFix https://github.com/elastic/elasticsearch/issues/99826 +grokStats-Ignore from employees | eval x = concat(gender, " foobar") | grok x "%{WORD:a} %{WORD:b}" | stats n = max(emp_no) by a | keep a, n | sort a asc; a:keyword | n:integer From af65bb9de8b0714e7540a7edfccf5a51d6c61c28 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 3 Oct 2023 00:51:20 -0700 Subject: [PATCH 053/136] Release ESQL response in IT tests (#100176) This commit makes sure that ESQL responses are closed in our IT tests. --- .../xpack/esql/action/EsqlActionIT.java | 824 ++++++++++-------- .../esql/action/EsqlActionRuntimeFieldIT.java | 22 +- .../xpack/esql/action/ManyShardsIT.java | 2 +- 3 files changed, 459 insertions(+), 389 deletions(-) diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java index 05580e586ef79..a4c13af3d99ad 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionIT.java @@ -76,23 +76,27 @@ public void setupIndex() { } public void testProjectConstant() { - EsqlQueryResponse results = run("from test | eval x = 1 | keep x"); - assertThat(results.columns(), equalTo(List.of(new ColumnInfo("x", "integer")))); - assertThat(getValuesList(results).size(), equalTo(40)); - assertThat(getValuesList(results).get(0).get(0), equalTo(1)); + try (EsqlQueryResponse results = run("from test | eval x = 1 | keep x")) { + assertThat(results.columns(), equalTo(List.of(new ColumnInfo("x", "integer")))); + assertThat(getValuesList(results).size(), equalTo(40)); + assertThat(getValuesList(results).get(0).get(0), equalTo(1)); + } + } public void testStatsOverConstant() { - EsqlQueryResponse results = run("from test | eval x = 1 | stats x = count(x)"); - assertThat(results.columns(), equalTo(List.of(new ColumnInfo("x", "long")))); - assertThat(getValuesList(results).size(), equalTo(1)); - assertThat(getValuesList(results).get(0).get(0), equalTo(40L)); + try (EsqlQueryResponse results = run("from test | eval x = 1 | stats x = count(x)")) { + assertThat(results.columns(), equalTo(List.of(new ColumnInfo("x", "long")))); + assertThat(getValuesList(results).size(), equalTo(1)); + assertThat(getValuesList(results).get(0).get(0), equalTo(40L)); + } } public void testRow() { long value = randomLongBetween(0, Long.MAX_VALUE); - EsqlQueryResponse response = run("row " + value); - assertEquals(List.of(List.of(value)), getValuesList(response)); + try (EsqlQueryResponse response = run("row " + value)) { + assertEquals(List.of(List.of(value)), getValuesList(response)); + } } public void testFromStatsGroupingAvgWithSort() { @@ -147,56 +151,58 @@ public void testFromStatsGroupingCountWithAliases() { } private void testFromStatsGroupingCountImpl(String command, String expectedFieldName, String expectedGroupName) { - EsqlQueryResponse results = run(command); - logger.info(results); - Assert.assertEquals(2, results.columns().size()); - - // assert column metadata - ColumnInfo groupColumn = results.columns().get(0); - assertEquals(expectedGroupName, groupColumn.name()); - assertEquals("long", groupColumn.type()); - ColumnInfo valuesColumn = results.columns().get(1); - assertEquals(expectedFieldName, valuesColumn.name()); - assertEquals("long", valuesColumn.type()); - - // assert column values - List> valueValues = getValuesList(results); - assertEquals(2, valueValues.size()); - // This is loathsome, find a declarative way to assert the expected output. - if ((long) valueValues.get(0).get(1) == 1L) { - assertEquals(20L, valueValues.get(0).get(0)); - assertEquals(2L, valueValues.get(1).get(1)); - assertEquals(20L, valueValues.get(1).get(0)); - } else if ((long) valueValues.get(0).get(1) == 2L) { - assertEquals(20L, valueValues.get(1).get(0)); - assertEquals(1L, valueValues.get(1).get(1)); - assertEquals(20L, valueValues.get(0).get(0)); - } else { - fail("Unexpected group value: " + valueValues.get(0).get(1)); + try (EsqlQueryResponse results = run(command)) { + logger.info(results); + Assert.assertEquals(2, results.columns().size()); + + // assert column metadata + ColumnInfo groupColumn = results.columns().get(0); + assertEquals(expectedGroupName, groupColumn.name()); + assertEquals("long", groupColumn.type()); + ColumnInfo valuesColumn = results.columns().get(1); + assertEquals(expectedFieldName, valuesColumn.name()); + assertEquals("long", valuesColumn.type()); + + // assert column values + List> valueValues = getValuesList(results); + assertEquals(2, valueValues.size()); + // This is loathsome, find a declarative way to assert the expected output. + if ((long) valueValues.get(0).get(1) == 1L) { + assertEquals(20L, valueValues.get(0).get(0)); + assertEquals(2L, valueValues.get(1).get(1)); + assertEquals(20L, valueValues.get(1).get(0)); + } else if ((long) valueValues.get(0).get(1) == 2L) { + assertEquals(20L, valueValues.get(1).get(0)); + assertEquals(1L, valueValues.get(1).get(1)); + assertEquals(20L, valueValues.get(0).get(0)); + } else { + fail("Unexpected group value: " + valueValues.get(0).get(1)); + } } } // Grouping where the groupby field is of a date type. public void testFromStatsGroupingByDate() { - EsqlQueryResponse results = run("from test | stats avg(count) by time"); - logger.info(results); - Assert.assertEquals(2, results.columns().size()); - Assert.assertEquals(40, getValuesList(results).size()); - - // assert column metadata - assertEquals("avg(count)", results.columns().get(0).name()); - assertEquals("double", results.columns().get(0).type()); - assertEquals("time", results.columns().get(1).name()); - assertEquals("long", results.columns().get(1).type()); - - // assert column values - List expectedValues = LongStream.range(0, 40).map(i -> epoch + i).sorted().boxed().toList(); - List actualValues = IntStream.range(0, 40) - .mapToLong(i -> (Long) getValuesList(results).get(i).get(1)) - .sorted() - .boxed() - .toList(); - assertEquals(expectedValues, actualValues); + try (EsqlQueryResponse results = run("from test | stats avg(count) by time")) { + logger.info(results); + Assert.assertEquals(2, results.columns().size()); + Assert.assertEquals(40, getValuesList(results).size()); + + // assert column metadata + assertEquals("avg(count)", results.columns().get(0).name()); + assertEquals("double", results.columns().get(0).type()); + assertEquals("time", results.columns().get(1).name()); + assertEquals("long", results.columns().get(1).type()); + + // assert column values + List expectedValues = LongStream.range(0, 40).map(i -> epoch + i).sorted().boxed().toList(); + List actualValues = IntStream.range(0, 40) + .mapToLong(i -> (Long) getValuesList(results).get(i).get(1)) + .sorted() + .boxed() + .toList(); + assertEquals(expectedValues, actualValues); + } } public void testFromGroupingByNumericFieldWithNulls() { @@ -228,25 +234,26 @@ record Group(Long data, Double avg) {} } public void testFromStatsGroupingByKeyword() { - EsqlQueryResponse results = run("from test | stats avg(count) by color"); - logger.info(results); - Assert.assertEquals(2, results.columns().size()); - Assert.assertEquals(3, getValuesList(results).size()); + try (EsqlQueryResponse results = run("from test | stats avg(count) by color")) { + logger.info(results); + Assert.assertEquals(2, results.columns().size()); + Assert.assertEquals(3, getValuesList(results).size()); - // assert column metadata - assertEquals("avg(count)", results.columns().get(0).name()); - assertEquals("double", results.columns().get(0).type()); - assertEquals("color", results.columns().get(1).name()); - assertEquals("keyword", results.columns().get(1).type()); - record Group(String color, double avg) { + // assert column metadata + assertEquals("avg(count)", results.columns().get(0).name()); + assertEquals("double", results.columns().get(0).type()); + assertEquals("color", results.columns().get(1).name()); + assertEquals("keyword", results.columns().get(1).type()); + record Group(String color, double avg) { + } + List expectedGroups = List.of(new Group("blue", 42.0), new Group("green", 44.0), new Group("red", 43)); + List actualGroups = getValuesList(results).stream() + .map(l -> new Group((String) l.get(1), (Double) l.get(0))) + .sorted(comparing(c -> c.color)) + .toList(); + assertThat(actualGroups, equalTo(expectedGroups)); } - List expectedGroups = List.of(new Group("blue", 42.0), new Group("green", 44.0), new Group("red", 43)); - List actualGroups = getValuesList(results).stream() - .map(l -> new Group((String) l.get(1), (Double) l.get(0))) - .sorted(comparing(c -> c.color)) - .toList(); - assertThat(actualGroups, equalTo(expectedGroups)); } public void testFromStatsGroupingByKeywordWithNulls() { @@ -262,69 +269,75 @@ public void testFromStatsGroupingByKeywordWithNulls() { } client().admin().indices().prepareRefresh("test").get(); for (String field : List.of("count", "count_d")) { - EsqlQueryResponse results = run("from test | stats avg = avg(" + field + ") by color"); + try (EsqlQueryResponse results = run("from test | stats avg = avg(" + field + ") by color")) { + logger.info(results); + Assert.assertEquals(2, results.columns().size()); + Assert.assertEquals(5, getValuesList(results).size()); + + // assert column metadata + assertEquals("avg", results.columns().get(0).name()); + assertEquals("double", results.columns().get(0).type()); + assertEquals("color", results.columns().get(1).name()); + assertEquals("keyword", results.columns().get(1).type()); + record Group(String color, Double avg) { + + } + List expectedGroups = List.of( + new Group(null, 120.0), + new Group("blue", 42.0), + new Group("green", 44.0), + new Group("red", 43.0), + new Group("yellow", null) + ); + List actualGroups = getValuesList(results).stream() + .map(l -> new Group((String) l.get(1), (Double) l.get(0))) + .sorted(Comparator.comparing(c -> c.color, Comparator.nullsFirst(String::compareTo))) + .toList(); + assertThat(actualGroups, equalTo(expectedGroups)); + } + } + } + + public void testFromStatsMultipleAggs() { + try ( + EsqlQueryResponse results = run( + "from test | stats a=avg(count), mi=min(count), ma=max(count), s=sum(count), c=count(count) by color" + ) + ) { logger.info(results); - Assert.assertEquals(2, results.columns().size()); - Assert.assertEquals(5, getValuesList(results).size()); + Assert.assertEquals(6, results.columns().size()); + Assert.assertEquals(3, getValuesList(results).size()); // assert column metadata - assertEquals("avg", results.columns().get(0).name()); + assertEquals("a", results.columns().get(0).name()); assertEquals("double", results.columns().get(0).type()); - assertEquals("color", results.columns().get(1).name()); - assertEquals("keyword", results.columns().get(1).type()); - record Group(String color, Double avg) { - - } + assertEquals("mi", results.columns().get(1).name()); + assertEquals("long", results.columns().get(1).type()); + assertEquals("ma", results.columns().get(2).name()); + assertEquals("long", results.columns().get(2).type()); + assertEquals("s", results.columns().get(3).name()); + assertEquals("long", results.columns().get(3).type()); + assertEquals("c", results.columns().get(4).name()); + assertEquals("long", results.columns().get(4).type()); + assertEquals("color", results.columns().get(5).name()); + assertEquals("keyword", results.columns().get(5).type()); + record Group(double avg, long mi, long ma, long s, long c, String color) {} List expectedGroups = List.of( - new Group(null, 120.0), - new Group("blue", 42.0), - new Group("green", 44.0), - new Group("red", 43.0), - new Group("yellow", null) + new Group(42, 42, 42, 420, 10, "blue"), + new Group(44, 44, 44, 440, 10, "green"), + new Group(43, 40, 46, 860, 20, "red") ); + // TODO: each aggregator returns Double now, it should in fact mirror the data type of the fields it's aggregating List actualGroups = getValuesList(results).stream() - .map(l -> new Group((String) l.get(1), (Double) l.get(0))) - .sorted(Comparator.comparing(c -> c.color, Comparator.nullsFirst(String::compareTo))) + .map( + l -> new Group((Double) l.get(0), (Long) l.get(1), (Long) l.get(2), (Long) l.get(3), (Long) l.get(4), (String) l.get(5)) + ) + .sorted(comparing(c -> c.color)) .toList(); assertThat(actualGroups, equalTo(expectedGroups)); } } - public void testFromStatsMultipleAggs() { - EsqlQueryResponse results = run( - "from test | stats a=avg(count), mi=min(count), ma=max(count), s=sum(count), c=count(count) by color" - ); - logger.info(results); - Assert.assertEquals(6, results.columns().size()); - Assert.assertEquals(3, getValuesList(results).size()); - - // assert column metadata - assertEquals("a", results.columns().get(0).name()); - assertEquals("double", results.columns().get(0).type()); - assertEquals("mi", results.columns().get(1).name()); - assertEquals("long", results.columns().get(1).type()); - assertEquals("ma", results.columns().get(2).name()); - assertEquals("long", results.columns().get(2).type()); - assertEquals("s", results.columns().get(3).name()); - assertEquals("long", results.columns().get(3).type()); - assertEquals("c", results.columns().get(4).name()); - assertEquals("long", results.columns().get(4).type()); - assertEquals("color", results.columns().get(5).name()); - assertEquals("keyword", results.columns().get(5).type()); - record Group(double avg, long mi, long ma, long s, long c, String color) {} - List expectedGroups = List.of( - new Group(42, 42, 42, 420, 10, "blue"), - new Group(44, 44, 44, 440, 10, "green"), - new Group(43, 40, 46, 860, 20, "red") - ); - // TODO: each aggregator returns Double now, it should in fact mirror the data type of the fields it's aggregating - List actualGroups = getValuesList(results).stream() - .map(l -> new Group((Double) l.get(0), (Long) l.get(1), (Long) l.get(2), (Long) l.get(3), (Long) l.get(4), (String) l.get(5))) - .sorted(comparing(c -> c.color)) - .toList(); - assertThat(actualGroups, equalTo(expectedGroups)); - } - public void testFromSortWithTieBreakerLimit() { try (EsqlQueryResponse results = run("from test | sort data, count desc, time | limit 5 | keep data, count, time")) { logger.info(results); @@ -342,103 +355,115 @@ public void testFromSortWithTieBreakerLimit() { } public void testFromStatsProjectGroup() { - EsqlQueryResponse results = run("from test | stats avg_count = avg(count) by data | keep data"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("data")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(1L), List.of(2L))); + try (EsqlQueryResponse results = run("from test | stats avg_count = avg(count) by data | keep data")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("data")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(1L), List.of(2L))); + } } public void testRowStatsProjectGroupByInt() { - EsqlQueryResponse results = run("row a = 1, b = 2 | stats count(b) by a | keep a"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("integer")); - assertThat(getValuesList(results), contains(List.of(1))); + try (EsqlQueryResponse results = run("row a = 1, b = 2 | stats count(b) by a | keep a")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("integer")); + assertThat(getValuesList(results), contains(List.of(1))); + } } public void testRowStatsProjectGroupByLong() { - EsqlQueryResponse results = run("row a = 1000000000000, b = 2 | stats count(b) by a | keep a"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long")); - assertThat(getValuesList(results), contains(List.of(1000000000000L))); + try (EsqlQueryResponse results = run("row a = 1000000000000, b = 2 | stats count(b) by a | keep a")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long")); + assertThat(getValuesList(results), contains(List.of(1000000000000L))); + } } public void testRowStatsProjectGroupByDouble() { - EsqlQueryResponse results = run("row a = 1.0, b = 2 | stats count(b) by a | keep a"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); - assertThat(getValuesList(results), contains(List.of(1.0))); + try (EsqlQueryResponse results = run("row a = 1.0, b = 2 | stats count(b) by a | keep a")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); + assertThat(getValuesList(results), contains(List.of(1.0))); + } } public void testRowStatsProjectGroupByKeyword() { - EsqlQueryResponse results = run("row a = \"hello\", b = 2 | stats count(b) by a | keep a"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("keyword")); - assertThat(getValuesList(results), contains(List.of("hello"))); + try (EsqlQueryResponse results = run("row a = \"hello\", b = 2 | stats count(b) by a | keep a")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("keyword")); + assertThat(getValuesList(results), contains(List.of("hello"))); + } } public void testFromStatsProjectGroupByDouble() { - EsqlQueryResponse results = run("from test | stats count(count) by data_d | keep data_d"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("data_d")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(1.0), List.of(2.0))); + try (EsqlQueryResponse results = run("from test | stats count(count) by data_d | keep data_d")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("data_d")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(1.0), List.of(2.0))); + } } public void testFromStatsProjectGroupWithAlias() { String query = "from test | stats avg_count = avg(count) by data | eval d2 = data | rename data as d | keep d, d2"; - EsqlQueryResponse results = run(query); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("d", "d2")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long", "long")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(1L, 1L), List.of(2L, 2L))); + try (EsqlQueryResponse results = run(query)) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("d", "d2")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("long", "long")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(1L, 1L), List.of(2L, 2L))); + } } public void testFromStatsProjectAgg() { - EsqlQueryResponse results = run("from test | stats a = avg(count) by data | keep a"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(42d), List.of(44d))); + try (EsqlQueryResponse results = run("from test | stats a = avg(count) by data | keep a")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("a")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(42d), List.of(44d))); + } } public void testFromStatsProjectAggWithAlias() { - EsqlQueryResponse results = run("from test | stats a = avg(count) by data | rename a as b | keep b"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("b")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(42d), List.of(44d))); + try (EsqlQueryResponse results = run("from test | stats a = avg(count) by data | rename a as b | keep b")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("b")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(42d), List.of(44d))); + } } public void testFromProjectStatsGroupByAlias() { - EsqlQueryResponse results = run("from test | rename data as d | keep d, count | stats avg(count) by d"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("avg(count)", "d")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double", "long")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(42d, 1L), List.of(44d, 2L))); + try (EsqlQueryResponse results = run("from test | rename data as d | keep d, count | stats avg(count) by d")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("avg(count)", "d")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double", "long")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(42d, 1L), List.of(44d, 2L))); + } } public void testFromProjectStatsAggregateAlias() { - EsqlQueryResponse results = run("from test | rename count as c | keep c, data | stats avg(c) by data"); - logger.info(results); - assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("avg(c)", "data")); - assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double", "long")); - assertThat(getValuesList(results), containsInAnyOrder(List.of(42d, 1L), List.of(44d, 2L))); + try (EsqlQueryResponse results = run("from test | rename count as c | keep c, data | stats avg(c) by data")) { + logger.info(results); + assertThat(results.columns().stream().map(ColumnInfo::name).toList(), contains("avg(c)", "data")); + assertThat(results.columns().stream().map(ColumnInfo::type).toList(), contains("double", "long")); + assertThat(getValuesList(results), containsInAnyOrder(List.of(42d, 1L), List.of(44d, 2L))); + } } public void testFromEvalStats() { - EsqlQueryResponse results = run("from test | eval ratio = data_d / count_d | stats avg(ratio)"); - logger.info(results); - Assert.assertEquals(1, results.columns().size()); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals("avg(ratio)", results.columns().get(0).name()); - assertEquals("double", results.columns().get(0).type()); - assertEquals(1, getValuesList(results).get(0).size()); - assertEquals(0.034d, (double) getValuesList(results).get(0).get(0), 0.001d); + try (EsqlQueryResponse results = run("from test | eval ratio = data_d / count_d | stats avg(ratio)")) { + logger.info(results); + Assert.assertEquals(1, results.columns().size()); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals("avg(ratio)", results.columns().get(0).name()); + assertEquals("double", results.columns().get(0).type()); + assertEquals(1, getValuesList(results).get(0).size()); + assertEquals(0.034d, (double) getValuesList(results).get(0).get(0), 0.001d); + } } public void testUngroupedCountAll() { @@ -485,48 +510,57 @@ public void testGroupedCountAllWithFilter() { public void testFromStatsEvalWithPragma() { assumeTrue("pragmas only enabled on snapshot builds", Build.current().isSnapshot()); - EsqlQueryResponse results = run("from test | stats avg_count = avg(count) | eval x = avg_count + 7"); - logger.info(results); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals(2, getValuesList(results).get(0).size()); - assertEquals(50, (double) getValuesList(results).get(0).get(results.columns().indexOf(new ColumnInfo("x", "double"))), 1d); - assertEquals(43, (double) getValuesList(results).get(0).get(results.columns().indexOf(new ColumnInfo("avg_count", "double"))), 1d); + try (EsqlQueryResponse results = run("from test | stats avg_count = avg(count) | eval x = avg_count + 7")) { + logger.info(results); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals(2, getValuesList(results).get(0).size()); + assertEquals(50, (double) getValuesList(results).get(0).get(results.columns().indexOf(new ColumnInfo("x", "double"))), 1d); + assertEquals( + 43, + (double) getValuesList(results).get(0).get(results.columns().indexOf(new ColumnInfo("avg_count", "double"))), + 1d + ); + } } public void testWhere() { - EsqlQueryResponse results = run("from test | where count > 40"); - logger.info(results); - Assert.assertEquals(30, getValuesList(results).size()); - var countIndex = results.columns().indexOf(new ColumnInfo("count", "long")); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(countIndex), greaterThan(40L)); + try (EsqlQueryResponse results = run("from test | where count > 40")) { + logger.info(results); + Assert.assertEquals(30, getValuesList(results).size()); + var countIndex = results.columns().indexOf(new ColumnInfo("count", "long")); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(countIndex), greaterThan(40L)); + } } } public void testProjectWhere() { - EsqlQueryResponse results = run("from test | keep count | where count > 40"); - logger.info(results); - Assert.assertEquals(30, getValuesList(results).size()); - int countIndex = results.columns().indexOf(new ColumnInfo("count", "long")); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(countIndex), greaterThan(40L)); + try (EsqlQueryResponse results = run("from test | keep count | where count > 40")) { + logger.info(results); + Assert.assertEquals(30, getValuesList(results).size()); + int countIndex = results.columns().indexOf(new ColumnInfo("count", "long")); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(countIndex), greaterThan(40L)); + } } } public void testEvalWhere() { - EsqlQueryResponse results = run("from test | eval x = count / 2 | where x > 20"); - logger.info(results); - Assert.assertEquals(30, getValuesList(results).size()); - int countIndex = results.columns().indexOf(new ColumnInfo("x", "long")); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(countIndex), greaterThan(20L)); + try (EsqlQueryResponse results = run("from test | eval x = count / 2 | where x > 20")) { + logger.info(results); + Assert.assertEquals(30, getValuesList(results).size()); + int countIndex = results.columns().indexOf(new ColumnInfo("x", "long")); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(countIndex), greaterThan(20L)); + } } } public void testFilterWithNullAndEval() { - EsqlQueryResponse results = run("row a = 1 | eval b = a + null | where b > 1"); - logger.info(results); - Assert.assertEquals(0, getValuesList(results).size()); + try (EsqlQueryResponse results = run("row a = 1 | eval b = a + null | where b > 1")) { + logger.info(results); + Assert.assertEquals(0, getValuesList(results).size()); + } } public void testStringLength() { @@ -546,101 +580,112 @@ public void testFilterWithNullAndEvalFromIndex() { client().admin().indices().prepareRefresh("test").get(); // sanity - EsqlQueryResponse results = run("from test"); - Assert.assertEquals(41, getValuesList(results).size()); - - results = run("from test | eval newCount = count + 1 | where newCount > 1"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("count", "long")))); - assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("count_d", "double")))); - assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("data", "long")))); - assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("data_d", "double")))); - assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("time", "long")))); + try (EsqlQueryResponse results = run("from test")) { + Assert.assertEquals(41, getValuesList(results).size()); + } + try (EsqlQueryResponse results = run("from test | eval newCount = count + 1 | where newCount > 1")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("count", "long")))); + assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("count_d", "double")))); + assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("data", "long")))); + assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("data_d", "double")))); + assertThat(results.columns(), hasItem(equalTo(new ColumnInfo("time", "long")))); + } } public void testMultiConditionalWhere() { - EsqlQueryResponse results = run( - "from test | eval abc = 1+2 | where (abc + count >= 44 or data_d == 2) and data == 1 | keep color, abc" - ); - logger.info(results); - Assert.assertEquals(10, getValuesList(results).size()); - Assert.assertEquals(2, results.columns().size()); - for (List values : getValuesList(results)) { - assertThat((String) values.get(0), equalTo("green")); - assertThat((Integer) values.get(1), equalTo(3)); + try (var results = run("from test | eval abc = 1+2 | where (abc + count >= 44 or data_d == 2) and data == 1 | keep color, abc")) { + logger.info(results); + Assert.assertEquals(10, getValuesList(results).size()); + Assert.assertEquals(2, results.columns().size()); + for (List values : getValuesList(results)) { + assertThat((String) values.get(0), equalTo("green")); + assertThat((Integer) values.get(1), equalTo(3)); + } } } public void testWhereNegatedCondition() { - EsqlQueryResponse results = run("from test | eval abc=1+2 | where abc + count > 45 and data != 1 | keep color, data"); - logger.info(results); - Assert.assertEquals(10, getValuesList(results).size()); - Assert.assertEquals(2, results.columns().size()); - for (List values : getValuesList(results)) { - assertThat((String) values.get(0), equalTo("red")); - assertThat((Long) values.get(1), equalTo(2L)); + try (var results = run("from test | eval abc=1+2 | where abc + count > 45 and data != 1 | keep color, data")) { + logger.info(results); + Assert.assertEquals(10, getValuesList(results).size()); + Assert.assertEquals(2, results.columns().size()); + for (List values : getValuesList(results)) { + assertThat((String) values.get(0), equalTo("red")); + assertThat((Long) values.get(1), equalTo(2L)); + } } } public void testEvalOverride() { - EsqlQueryResponse results = run("from test | eval count = count + 1 | eval count = count + 1"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - Assert.assertEquals(1, results.columns().stream().filter(c -> c.name().equals("count")).count()); - int countIndex = results.columns().size() - 1; - Assert.assertEquals(new ColumnInfo("count", "long"), results.columns().get(countIndex)); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(countIndex), greaterThanOrEqualTo(42L)); + try (var results = run("from test | eval count = count + 1 | eval count = count + 1")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + Assert.assertEquals(1, results.columns().stream().filter(c -> c.name().equals("count")).count()); + int countIndex = results.columns().size() - 1; + Assert.assertEquals(new ColumnInfo("count", "long"), results.columns().get(countIndex)); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(countIndex), greaterThanOrEqualTo(42L)); + } } } public void testProjectRename() { - EsqlQueryResponse results = run("from test | eval y = count | rename count as x | keep x, y"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"))); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); - assertThat(values.get(1), is(values.get(0))); + try (var results = run("from test | eval y = count | rename count as x | keep x, y")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"))); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); + assertThat(values.get(1), is(values.get(0))); + } } } public void testProjectRenameEval() { - EsqlQueryResponse results = run("from test | eval y = count | rename count as x | keep x, y | eval x2 = x + 1 | eval y2 = y + 2"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - assertThat( - results.columns(), - contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"), new ColumnInfo("x2", "long"), new ColumnInfo("y2", "long")) - ); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); - assertThat(values.get(1), is(values.get(0))); - assertThat(values.get(2), is(((Long) values.get(0)) + 1)); - assertThat(values.get(3), is(((Long) values.get(0)) + 2)); + try (var results = run("from test | eval y = count | rename count as x | keep x, y | eval x2 = x + 1 | eval y2 = y + 2")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + assertThat( + results.columns(), + contains( + new ColumnInfo("x", "long"), + new ColumnInfo("y", "long"), + new ColumnInfo("x2", "long"), + new ColumnInfo("y2", "long") + ) + ); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); + assertThat(values.get(1), is(values.get(0))); + assertThat(values.get(2), is(((Long) values.get(0)) + 1)); + assertThat(values.get(3), is(((Long) values.get(0)) + 2)); + } } } public void testProjectRenameEvalProject() { - EsqlQueryResponse results = run("from test | eval y = count | rename count as x | keep x, y | eval z = x + y | keep x, y, z"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"), new ColumnInfo("z", "long"))); - for (List values : getValuesList(results)) { - assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); - assertThat(values.get(1), is(values.get(0))); - assertThat(values.get(2), is((Long) values.get(0) * 2)); + try (var results = run("from test | eval y = count | rename count as x | keep x, y | eval z = x + y | keep x, y, z")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + assertThat(results.columns(), contains(new ColumnInfo("x", "long"), new ColumnInfo("y", "long"), new ColumnInfo("z", "long"))); + for (List values : getValuesList(results)) { + assertThat((Long) values.get(0), greaterThanOrEqualTo(40L)); + assertThat(values.get(1), is(values.get(0))); + assertThat(values.get(2), is((Long) values.get(0) * 2)); + } } } public void testProjectOverride() { - EsqlQueryResponse results = run("from test | eval cnt = count | rename count as data | keep cnt, data"); - logger.info(results); - Assert.assertEquals(40, getValuesList(results).size()); - assertThat(results.columns(), contains(new ColumnInfo("cnt", "long"), new ColumnInfo("data", "long"))); - for (List values : getValuesList(results)) { - assertThat(values.get(1), is(values.get(0))); + try (var results = run("from test | eval cnt = count | rename count as data | keep cnt, data")) { + logger.info(results); + Assert.assertEquals(40, getValuesList(results).size()); + assertThat(results.columns(), contains(new ColumnInfo("cnt", "long"), new ColumnInfo("data", "long"))); + for (List values : getValuesList(results)) { + assertThat(values.get(1), is(values.get(0))); + } } } @@ -708,9 +753,10 @@ public void testRefreshSearchIdleShards() throws Exception { stopped.set(true); indexingThread.join(); } - EsqlQueryResponse results = run("from test_refresh | stats s = sum(value)"); - logger.info(results); - assertThat(getValuesList(results).get(0), equalTo(List.of(totalValues.get()))); + try (EsqlQueryResponse results = run("from test_refresh | stats s = sum(value)")) { + logger.info(results); + assertThat(getValuesList(results).get(0), equalTo(List.of(totalValues.get()))); + } } public void testESFilter() throws Exception { @@ -737,16 +783,19 @@ public void testESFilter() throws Exception { long from = randomBoolean() ? Long.MIN_VALUE : randomLongBetween(-1000, 1000); long to = randomBoolean() ? Long.MAX_VALUE : randomLongBetween(from, from + 1000); QueryBuilder filter = new RangeQueryBuilder("val").from(from, true).to(to, true); - EsqlQueryResponse results = new EsqlQueryRequestBuilder(client(), EsqlQueryAction.INSTANCE).query(command) - .filter(filter) - .pragmas(randomPragmas()) - .get(); - logger.info(results); - OptionalDouble avg = docs.values().stream().filter(v -> from <= v && v <= to).mapToLong(n -> n).average(); - if (avg.isPresent()) { - assertEquals(avg.getAsDouble(), (double) getValuesList(results).get(0).get(0), 0.01d); - } else { - assertThat(getValuesList(results).get(0).get(0), nullValue()); + try ( + EsqlQueryResponse results = new EsqlQueryRequestBuilder(client(), EsqlQueryAction.INSTANCE).query(command) + .filter(filter) + .pragmas(randomPragmas()) + .get() + ) { + logger.info(results); + OptionalDouble avg = docs.values().stream().filter(v -> from <= v && v <= to).mapToLong(n -> n).average(); + if (avg.isPresent()) { + assertEquals(avg.getAsDouble(), (double) getValuesList(results).get(0).get(0), 0.01d); + } else { + assertThat(getValuesList(results).get(0).get(0), nullValue()); + } } } @@ -793,21 +842,23 @@ record Doc(long val, String tag) { } public void testEvalWithNullAndAvg() { - EsqlQueryResponse results = run("from test | eval nullsum = count_d + null | stats avg(nullsum)"); - logger.info(results); - Assert.assertEquals(1, results.columns().size()); - Assert.assertEquals(1, getValuesList(results).size()); - assertEquals("avg(nullsum)", results.columns().get(0).name()); - assertEquals("double", results.columns().get(0).type()); - assertEquals(1, getValuesList(results).get(0).size()); - assertNull(getValuesList(results).get(0).get(0)); + try (EsqlQueryResponse results = run("from test | eval nullsum = count_d + null | stats avg(nullsum)")) { + logger.info(results); + Assert.assertEquals(1, results.columns().size()); + Assert.assertEquals(1, getValuesList(results).size()); + assertEquals("avg(nullsum)", results.columns().get(0).name()); + assertEquals("double", results.columns().get(0).type()); + assertEquals(1, getValuesList(results).get(0).size()); + assertNull(getValuesList(results).get(0).get(0)); + } } public void testFromStatsLimit() { - EsqlQueryResponse results = run("from test | stats ac = avg(count) by data | limit 1"); - logger.info(results); - assertThat(results.columns(), contains(new ColumnInfo("ac", "double"), new ColumnInfo("data", "long"))); - assertThat(getValuesList(results), contains(anyOf(contains(42.0, 1L), contains(44.0, 2L)))); + try (EsqlQueryResponse results = run("from test | stats ac = avg(count) by data | limit 1")) { + logger.info(results); + assertThat(results.columns(), contains(new ColumnInfo("ac", "double"), new ColumnInfo("data", "long"))); + assertThat(getValuesList(results), contains(anyOf(contains(42.0, 1L), contains(44.0, 2L)))); + } } public void testFromLimit() { @@ -819,18 +870,20 @@ public void testFromLimit() { } public void testDropAllColumns() { - EsqlQueryResponse results = run("from test | keep data | drop data | eval a = 1"); - logger.info(results); - assertThat(results.columns(), hasSize(1)); - assertThat(results.columns(), contains(new ColumnInfo("a", "integer"))); - assertThat(getValuesList(results), is(empty())); + try (EsqlQueryResponse results = run("from test | keep data | drop data | eval a = 1")) { + logger.info(results); + assertThat(results.columns(), hasSize(1)); + assertThat(results.columns(), contains(new ColumnInfo("a", "integer"))); + assertThat(getValuesList(results), is(empty())); + } } public void testDropAllColumnsWithStats() { - EsqlQueryResponse results = run("from test | stats g = count(data) | drop g"); - logger.info(results); - assertThat(results.columns(), is(empty())); - assertThat(getValuesList(results), is(empty())); + try (EsqlQueryResponse results = run("from test | stats g = count(data) | drop g")) { + logger.info(results); + assertThat(results.columns(), is(empty())); + assertThat(getValuesList(results), is(empty())); + } } public void testIndexPatterns() throws Exception { @@ -856,35 +909,43 @@ public void testIndexPatterns() throws Exception { .get(); } - EsqlQueryResponse results = run("from test_index_patterns* | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(15L, getValuesList(results).get(0).get(0)); - assertEquals(120000L, getValuesList(results).get(0).get(1)); + try (var results = run("from test_index_patterns* | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(15L, getValuesList(results).get(0).get(0)); + assertEquals(120000L, getValuesList(results).get(0).get(1)); + + } - results = run("from test_index_patterns_1,test_index_patterns_2 | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(10L, getValuesList(results).get(0).get(0)); - assertEquals(55000L, getValuesList(results).get(0).get(1)); + try (var results = run("from test_index_patterns_1,test_index_patterns_2 | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(10L, getValuesList(results).get(0).get(0)); + assertEquals(55000L, getValuesList(results).get(0).get(1)); + } - results = run("from test_index_patterns_1*,test_index_patterns_2* | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(10L, getValuesList(results).get(0).get(0)); - assertEquals(55000L, getValuesList(results).get(0).get(1)); + try (var results = run("from test_index_patterns_1*,test_index_patterns_2* | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(10L, getValuesList(results).get(0).get(0)); + assertEquals(55000L, getValuesList(results).get(0).get(1)); + } - results = run("from test_index_patterns_*,-test_index_patterns_1 | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(10L, getValuesList(results).get(0).get(0)); - assertEquals(105000L, getValuesList(results).get(0).get(1)); + try (var results = run("from test_index_patterns_*,-test_index_patterns_1 | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(10L, getValuesList(results).get(0).get(0)); + assertEquals(105000L, getValuesList(results).get(0).get(1)); + } - results = run("from * | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(55L, getValuesList(results).get(0).get(0)); - assertEquals(121720L, getValuesList(results).get(0).get(1)); + try (var results = run("from * | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(55L, getValuesList(results).get(0).get(0)); + assertEquals(121720L, getValuesList(results).get(0).get(1)); - results = run("from test_index_patterns_2 | stats count(data), sum(count)"); - assertEquals(1, getValuesList(results).size()); - assertEquals(5L, getValuesList(results).get(0).get(0)); - assertEquals(40000L, getValuesList(results).get(0).get(1)); + } + + try (var results = run("from test_index_patterns_2 | stats count(data), sum(count)")) { + assertEquals(1, getValuesList(results).size()); + assertEquals(5L, getValuesList(results).get(0).get(0)); + assertEquals(40000L, getValuesList(results).get(0).get(1)); + } } public void testOverlappingIndexPatterns() throws Exception { @@ -923,48 +984,52 @@ public void testOverlappingIndexPatterns() throws Exception { public void testEmptyIndex() { assertAcked(client().admin().indices().prepareCreate("test_empty").setMapping("k", "type=keyword", "v", "type=long").get()); - EsqlQueryResponse results = run("from test_empty"); - assertThat(results.columns(), equalTo(List.of(new ColumnInfo("k", "keyword"), new ColumnInfo("v", "long")))); - assertThat(getValuesList(results), empty()); + try (EsqlQueryResponse results = run("from test_empty")) { + assertThat(results.columns(), equalTo(List.of(new ColumnInfo("k", "keyword"), new ColumnInfo("v", "long")))); + assertThat(getValuesList(results), empty()); + } } public void testShowInfo() { - EsqlQueryResponse results = run("show info"); - assertThat( - results.columns(), - equalTo(List.of(new ColumnInfo("version", "keyword"), new ColumnInfo("date", "keyword"), new ColumnInfo("hash", "keyword"))) - ); - assertThat(getValuesList(results).size(), equalTo(1)); - assertThat(getValuesList(results).get(0).get(0), equalTo(Build.current().version())); - assertThat(getValuesList(results).get(0).get(1), equalTo(Build.current().date())); - assertThat(getValuesList(results).get(0).get(2), equalTo(Build.current().hash())); + try (EsqlQueryResponse results = run("show info")) { + assertThat( + results.columns(), + equalTo(List.of(new ColumnInfo("version", "keyword"), new ColumnInfo("date", "keyword"), new ColumnInfo("hash", "keyword"))) + ); + assertThat(getValuesList(results).size(), equalTo(1)); + assertThat(getValuesList(results).get(0).get(0), equalTo(Build.current().version())); + assertThat(getValuesList(results).get(0).get(1), equalTo(Build.current().date())); + assertThat(getValuesList(results).get(0).get(2), equalTo(Build.current().hash())); + } } public void testShowFunctions() { - EsqlQueryResponse results = run("show functions"); - assertThat( - results.columns(), - equalTo( - List.of( - new ColumnInfo("name", "keyword"), - new ColumnInfo("synopsis", "keyword"), - new ColumnInfo("argNames", "keyword"), - new ColumnInfo("argTypes", "keyword"), - new ColumnInfo("argDescriptions", "keyword"), - new ColumnInfo("returnType", "keyword"), - new ColumnInfo("description", "keyword"), - new ColumnInfo("optionalArgs", "boolean"), - new ColumnInfo("variadic", "boolean") + try (EsqlQueryResponse results = run("show functions")) { + assertThat( + results.columns(), + equalTo( + List.of( + new ColumnInfo("name", "keyword"), + new ColumnInfo("synopsis", "keyword"), + new ColumnInfo("argNames", "keyword"), + new ColumnInfo("argTypes", "keyword"), + new ColumnInfo("argDescriptions", "keyword"), + new ColumnInfo("returnType", "keyword"), + new ColumnInfo("description", "keyword"), + new ColumnInfo("optionalArgs", "boolean"), + new ColumnInfo("variadic", "boolean") + ) ) - ) - ); - assertThat(getValuesList(results).size(), equalTo(new EsqlFunctionRegistry().listFunctions().size())); + ); + assertThat(getValuesList(results).size(), equalTo(new EsqlFunctionRegistry().listFunctions().size())); + } } public void testInWithNullValue() { - EsqlQueryResponse results = run("from test | where null in (data, 2) | keep data"); - assertThat(results.columns(), equalTo(List.of(new ColumnInfo("data", "long")))); - assertThat(getValuesList(results).size(), equalTo(0)); + try (EsqlQueryResponse results = run("from test | where null in (data, 2) | keep data")) { + assertThat(results.columns(), equalTo(List.of(new ColumnInfo("data", "long")))); + assertThat(getValuesList(results).size(), equalTo(0)); + } } public void testTopNPushedToLucene() { @@ -1117,7 +1182,7 @@ public void testGroupingMultiValueByOrdinals() { var functions = List.of("min(v)", "max(v)", "count_distinct(v)", "count(v)", "sum(v)", "avg(v)", "percentile(v, 90)"); for (String fn : functions) { String query = String.format(Locale.ROOT, "from %s | stats s = %s by kw", indexName, fn); - run(query); + run(query).close(); } } @@ -1194,14 +1259,15 @@ private void createAlias(List indices, String alias) throws InterruptedE } private void assertNoNestedDocuments(String query, int docsCount, long minValue, long maxValue) { - EsqlQueryResponse results = run(query); - assertThat(results.columns(), contains(new ColumnInfo("data", "long"))); - assertThat(results.columns().size(), is(1)); - assertThat(getValuesList(results).size(), is(docsCount)); - for (List row : getValuesList(results)) { - assertThat(row.size(), is(1)); - // check that all the values returned are the regular ones - assertThat((Long) row.get(0), allOf(greaterThanOrEqualTo(minValue), lessThanOrEqualTo(maxValue))); + try (EsqlQueryResponse results = run(query)) { + assertThat(results.columns(), contains(new ColumnInfo("data", "long"))); + assertThat(results.columns().size(), is(1)); + assertThat(getValuesList(results).size(), is(docsCount)); + for (List row : getValuesList(results)) { + assertThat(row.size(), is(1)); + // check that all the values returned are the regular ones + assertThat((Long) row.get(0), allOf(greaterThanOrEqualTo(minValue), lessThanOrEqualTo(maxValue))); + } } } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionRuntimeFieldIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionRuntimeFieldIT.java index 4a21cc5a77521..41450be131e2a 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionRuntimeFieldIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionRuntimeFieldIT.java @@ -53,14 +53,16 @@ protected Collection> nodePlugins() { public void testLong() throws InterruptedException, IOException { createIndexWithConstRuntimeField("long"); - EsqlQueryResponse response = run("from test | stats sum(const)"); - assertThat(getValuesList(response), equalTo(List.of(List.of((long) SIZE)))); + try (EsqlQueryResponse response = run("from test | stats sum(const)")) { + assertThat(getValuesList(response), equalTo(List.of(List.of((long) SIZE)))); + } } public void testDouble() throws InterruptedException, IOException { createIndexWithConstRuntimeField("double"); - EsqlQueryResponse response = run("from test | stats sum(const)"); - assertThat(getValuesList(response), equalTo(List.of(List.of((double) SIZE)))); + try (EsqlQueryResponse response = run("from test | stats sum(const)")) { + assertThat(getValuesList(response), equalTo(List.of(List.of((double) SIZE)))); + } } public void testKeyword() throws InterruptedException, IOException { @@ -76,8 +78,9 @@ public void testKeyword() throws InterruptedException, IOException { */ public void testKeywordBy() throws InterruptedException, IOException { createIndexWithConstRuntimeField("keyword"); - EsqlQueryResponse response = run("from test | stats max(foo) by const"); - assertThat(getValuesList(response), equalTo(List.of(List.of(SIZE - 1L, "const")))); + try (EsqlQueryResponse response = run("from test | stats max(foo) by const")) { + assertThat(getValuesList(response), equalTo(List.of(List.of(SIZE - 1L, "const")))); + } } public void testBoolean() throws InterruptedException, IOException { @@ -89,9 +92,10 @@ public void testBoolean() throws InterruptedException, IOException { public void testDate() throws InterruptedException, IOException { createIndexWithConstRuntimeField("date"); - EsqlQueryResponse response = run(""" - from test | eval d=date_format("yyyy", const) | stats min (foo) by d"""); - assertThat(getValuesList(response), equalTo(List.of(List.of(0L, "2023")))); + try (EsqlQueryResponse response = run(""" + from test | eval d=date_format("yyyy", const) | stats min (foo) by d""")) { + assertThat(getValuesList(response), equalTo(List.of(List.of(0L, "2023")))); + } } private void createIndexWithConstRuntimeField(String type) throws InterruptedException, IOException { diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java index c1476c8c52de5..7828ba97ed62b 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/ManyShardsIT.java @@ -63,7 +63,7 @@ public void testConcurrentQueries() throws Exception { if (canUseQueryPragmas()) { pragmas.put(randomPragmas().getSettings()).put("exchange_concurrent_clients", between(1, 2)); } - run("from test-* | stats count(user) by tags", new QueryPragmas(pragmas.build())); + run("from test-* | stats count(user) by tags", new QueryPragmas(pragmas.build())).close(); }); } for (Thread thread : threads) { From 5b2917c8769a16e4c493d3a18be080912c3f8a6b Mon Sep 17 00:00:00 2001 From: tmgordeeva Date: Tue, 3 Oct 2023 01:13:36 -0700 Subject: [PATCH 054/136] Allow parsing on non-string routing fields (#97729) Allow parsing on non-string routing fields Closes https://github.com/elastic/elasticsearch/issues/96552 --- docs/changelog/97729.yaml | 5 +++ .../test/aggregations/time_series.yml | 36 +++++++++++++++++++ .../cluster/routing/IndexRouting.java | 4 +++ .../cluster/routing/IndexRoutingTests.java | 10 ++++++ 4 files changed, 55 insertions(+) create mode 100644 docs/changelog/97729.yaml diff --git a/docs/changelog/97729.yaml b/docs/changelog/97729.yaml new file mode 100644 index 0000000000000..f80a04bc58f68 --- /dev/null +++ b/docs/changelog/97729.yaml @@ -0,0 +1,5 @@ +pr: 97729 +summary: Allow parsing on non-string routing fields +area: Aggregations +type: bug +issues: [] diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml index 37306151a72be..26c9b32a2f7a1 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/time_series.yml @@ -302,3 +302,39 @@ setup: mappings: _source: enabled: false + +--- +"Number for keyword routing field": + - skip: + version: " - 8.10.99" + reason: "Fix in 8.11" + + - do: + bulk: + index: tsdb + refresh: true + body: + - '{ "index": {} }' + - '{ "key": 10, "val": 1, "@timestamp": "2021-10-01T00:00:10Z" }' + - '{ "index": {}}' + - '{ "key": 11, "val": 2, "@timestamp": "2021-10-01T00:00:00Z" }' + + - do: + search: + index: tsdb + body: + query: + range: + "@timestamp": + gte: "2021-10-01T00:00:00Z" + size: 0 + aggs: + ts: + time_series: + keyed: false + + - match: { hits.total.value: 2 } + - length: { aggregations: 1 } + + - match: { aggregations.ts.buckets.0.key: { "key": "10" } } + - match: { aggregations.ts.buckets.0.doc_count: 1 } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index ceaa696f6e55e..cd05ca3d523d8 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -337,6 +337,10 @@ private void extractItem(String path, XContentParser source) throws IOException case VALUE_NULL: source.nextToken(); break; + case VALUE_NUMBER: // allow parsing numbers assuming routing fields are always keyword fields + hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text())))); + source.nextToken(); + break; default: throw new ParsingException( source.getTokenLocation(), diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java index 305c0711e593e..799fa5c990864 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java @@ -583,6 +583,16 @@ public void testRoutingPathDotInName() throws IOException { assertIndexShard(routing, Map.of("foo.bar", "cat", "baz", "dog"), Math.floorMod(hash(List.of("foo.bar", "cat")), shards)); } + public void testRoutingPathNumbersInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "foo"); + long randomLong = randomLong(); + assertIndexShard(routing, Map.of("foo", randomLong), Math.floorMod(hash(List.of("foo", Long.toString(randomLong))), shards)); + double randomDouble = randomDouble(); + assertIndexShard(routing, Map.of("foo", randomDouble), Math.floorMod(hash(List.of("foo", Double.toString(randomDouble))), shards)); + assertIndexShard(routing, Map.of("foo", 123), Math.floorMod(hash(List.of("foo", "123")), shards)); + } + public void testRoutingPathBwc() throws IOException { IndexVersion version = IndexVersionUtils.randomCompatibleVersion(random()); IndexRouting routing = indexRoutingForPath(version, 8, "dim.*,other.*,top"); From 62b75c71719bdebf4e47d8e48d10fb995f30e2ef Mon Sep 17 00:00:00 2001 From: Fernando Briano Date: Tue, 3 Oct 2023 09:18:31 +0100 Subject: [PATCH 055/136] Adds Julia Elasticsearch client to community-clients (#98048) Closes #96736 --- docs/community-clients/index.asciidoc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/community-clients/index.asciidoc b/docs/community-clients/index.asciidoc index 063d3474ad9c8..e15e766ffec0c 100644 --- a/docs/community-clients/index.asciidoc +++ b/docs/community-clients/index.asciidoc @@ -25,6 +25,7 @@ a number of clients that have been contributed by the community for various lang * <> * <> * <> +* <> * <> * <> * <> @@ -121,6 +122,12 @@ client]. See the {client}/javascript-api/current/index.html[official Elasticsearch JavaScript client]. +[[julia]] +== Julia + +* https://github.com/OpenSesame/ElasticsearchClient.jl[ElasticsearchClient.jl]: +Elasticsearch client inspired by the {client}/ruby-api/current/index.html[official Elasticsearch Ruby client]. + [[kotlin]] == Kotlin From 74936587e7b749be565d4e1978c755cbb97ecdfd Mon Sep 17 00:00:00 2001 From: David Roberts Date: Tue, 3 Oct 2023 09:30:42 +0100 Subject: [PATCH 056/136] [ML] Log warnings for jobs unassigned for a long time (#100154) If a job is unassigned for a long time (say more than 15 minutes) then that's a sign of a potential problem with the cluster. In Cloud it may be an indication of a failure of autoscaling. In self-managed it may be an indication of a failed node not being replaced. Either way, warning that the situation exists in periodic log messages should make it easier for operators to detect the situation and attempt to remedy it. --- docs/changelog/100154.yaml | 5 + .../xpack/ml/MlAssignmentNotifier.java | 112 ++++++++++++++++++ .../xpack/ml/MlAssignmentNotifierTests.java | 76 +++++++++++- 3 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/100154.yaml diff --git a/docs/changelog/100154.yaml b/docs/changelog/100154.yaml new file mode 100644 index 0000000000000..5e75102390c61 --- /dev/null +++ b/docs/changelog/100154.yaml @@ -0,0 +1,5 @@ +pr: 100154 +summary: Log warnings for jobs unassigned for a long time +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java index c9f338848fe57..2378f0becd959 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MlAssignmentNotifier.java @@ -9,6 +9,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateListener; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; @@ -22,27 +23,55 @@ import org.elasticsearch.xpack.core.ml.action.OpenJobAction; import org.elasticsearch.xpack.core.ml.action.StartDataFrameAnalyticsAction; import org.elasticsearch.xpack.core.ml.action.StartDatafeedAction; +import org.elasticsearch.xpack.core.ml.utils.MlTaskParams; import org.elasticsearch.xpack.ml.notifications.AnomalyDetectionAuditor; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; +import java.time.Clock; +import java.time.Duration; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.Objects; public class MlAssignmentNotifier implements ClusterStateListener { private static final Logger logger = LogManager.getLogger(MlAssignmentNotifier.class); + static final Duration MIN_CHECK_UNASSIGNED_INTERVAL = Duration.ofSeconds(30); + static final Duration LONG_TIME_UNASSIGNED_INTERVAL = Duration.ofMinutes(15); + static final Duration MIN_REPORT_INTERVAL = Duration.ofHours(6); + private final AnomalyDetectionAuditor anomalyDetectionAuditor; private final DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor; private final ThreadPool threadPool; + private final Clock clock; + private Map unassignedInfoByTask = Map.of(); + private volatile Instant lastLogCheck; MlAssignmentNotifier( AnomalyDetectionAuditor anomalyDetectionAuditor, DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor, ThreadPool threadPool, ClusterService clusterService + ) { + this(anomalyDetectionAuditor, dataFrameAnalyticsAuditor, threadPool, clusterService, Clock.systemUTC()); + } + + MlAssignmentNotifier( + AnomalyDetectionAuditor anomalyDetectionAuditor, + DataFrameAnalyticsAuditor dataFrameAnalyticsAuditor, + ThreadPool threadPool, + ClusterService clusterService, + Clock clock ) { this.anomalyDetectionAuditor = anomalyDetectionAuditor; this.dataFrameAnalyticsAuditor = dataFrameAnalyticsAuditor; this.threadPool = threadPool; + this.clock = clock; + this.lastLogCheck = clock.instant(); clusterService.addListener(this); } @@ -54,9 +83,16 @@ private String executorName() { public void clusterChanged(ClusterChangedEvent event) { if (event.localNodeMaster() == false) { + unassignedInfoByTask = Map.of(); return; } + Instant now = clock.instant(); + if (lastLogCheck.plus(MIN_CHECK_UNASSIGNED_INTERVAL).isBefore(now)) { + lastLogCheck = now; + threadPool.executor(executorName()).execute(() -> logLongTimeUnassigned(now, event.state())); + } + if (event.metadataChanged() == false) { return; } @@ -223,4 +259,80 @@ static String nodeName(DiscoveryNodes nodes, String nodeId) { } return nodeId; } + + private void logLongTimeUnassigned(Instant now, ClusterState state) { + PersistentTasksCustomMetadata tasks = state.getMetadata().custom(PersistentTasksCustomMetadata.TYPE); + if (tasks == null) { + return; + } + + List itemsToReport = findLongTimeUnassignedTasks(now, tasks); + + logger.warn("ML persistent tasks unassigned for a long time [{}]", String.join("|", itemsToReport)); + } + + /** + * Creates a list of items to be logged to report ML job tasks that: + * 1. Have been unassigned for a long time + * 2. Have not been logged recently (to avoid log spam) + *

+ * Only report on jobs, not datafeeds, on the assumption that jobs and their corresponding + * datafeeds get assigned together. This may miss some obscure edge cases, but will avoid + * the verbose and confusing messages that the duplication between jobs and datafeeds would + * generally cause. + *

+ * The time intervals used in this reporting reset each time the master node changes, as + * the data structure used to record the information is in memory on the current master node, + * not in cluster state. + */ + synchronized List findLongTimeUnassignedTasks(Instant now, PersistentTasksCustomMetadata tasks) { + + assert tasks != null; + + final List itemsToReport = new ArrayList<>(); + final Map oldUnassignedInfoByTask = unassignedInfoByTask; + final Map newUnassignedInfoByTask = new HashMap<>(); + + for (PersistentTask task : tasks.tasks()) { + if (task.getExecutorNode() == null) { + final String taskName = task.getTaskName(); + if (MlTasks.JOB_TASK_NAME.equals(taskName) || MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME.equals(taskName)) { + final String mlId = ((MlTaskParams) task.getParams()).getMlId(); + final TaskNameAndId key = new TaskNameAndId(taskName, mlId); + final UnassignedTimeAndReportTime previousInfo = oldUnassignedInfoByTask.get(key); + final Instant firstUnassignedTime; + final Instant lastReportedTime; + if (previousInfo != null) { + firstUnassignedTime = previousInfo.unassignedTime(); + if (firstUnassignedTime.plus(LONG_TIME_UNASSIGNED_INTERVAL).isBefore(now) + && (previousInfo.reportTime() == null || previousInfo.reportTime().plus(MIN_REPORT_INTERVAL).isBefore(now))) { + lastReportedTime = now; + itemsToReport.add( + Strings.format( + "[%s]/[%s] unassigned for [%d] seconds", + taskName, + mlId, + ChronoUnit.SECONDS.between(firstUnassignedTime, now) + ) + ); + } else { + lastReportedTime = previousInfo.reportTime(); + } + } else { + firstUnassignedTime = now; + lastReportedTime = null; + } + newUnassignedInfoByTask.put(key, new UnassignedTimeAndReportTime(firstUnassignedTime, lastReportedTime)); + } + } + } + + unassignedInfoByTask = newUnassignedInfoByTask; + + return itemsToReport; + } + + private record TaskNameAndId(String taskName, String mlId) {}; + + private record UnassignedTimeAndReportTime(Instant unassignedTime, Instant reportTime) {}; } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java index 7960bf3ea7068..a393f691ae004 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/MlAssignmentNotifierTests.java @@ -17,17 +17,25 @@ import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.ml.job.config.JobState; import org.elasticsearch.xpack.ml.notifications.AnomalyDetectionAuditor; import org.elasticsearch.xpack.ml.notifications.DataFrameAnalyticsAuditor; import org.junit.Before; import java.net.InetAddress; +import java.time.Duration; +import java.time.Instant; import java.util.Collections; +import java.util.List; import java.util.concurrent.ExecutorService; import static org.elasticsearch.xpack.ml.job.task.OpenJobPersistentTasksExecutorTests.addJobTask; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.empty; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -47,10 +55,9 @@ public void setupMocks() { dataFrameAnalyticsAuditor = mock(DataFrameAnalyticsAuditor.class); clusterService = mock(ClusterService.class); threadPool = mock(ThreadPool.class); - threadPool = mock(ThreadPool.class); ExecutorService executorService = mock(ExecutorService.class); - org.mockito.Mockito.doAnswer(invocation -> { + doAnswer(invocation -> { ((Runnable) invocation.getArguments()[0]).run(); return null; }).when(executorService).execute(any(Runnable.class)); @@ -233,4 +240,69 @@ public void testAuditUnassignedMlTasks() { verify(anomalyDetectionAuditor, times(2)).includeNodeInfo(); } } + + public void testFindLongTimeUnassignedTasks() { + MlAssignmentNotifier notifier = new MlAssignmentNotifier( + anomalyDetectionAuditor, + dataFrameAnalyticsAuditor, + threadPool, + clusterService + ); + + Instant now = Instant.now(); + Instant eightHoursAgo = now.minus(Duration.ofHours(8)); + Instant sevenHoursAgo = eightHoursAgo.plus(Duration.ofHours(1)); + Instant twoHoursAgo = sevenHoursAgo.plus(Duration.ofHours(5)); + + PersistentTasksCustomMetadata.Builder tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job1", "node1", JobState.OPENED, tasksBuilder); + addJobTask("job2", "node1", JobState.OPENED, tasksBuilder); + addJobTask("job3", null, JobState.OPENED, tasksBuilder); + addJobTask("job4", null, JobState.OPENED, tasksBuilder); + addJobTask("job5", null, JobState.OPENED, tasksBuilder); + List itemsToReport = notifier.findLongTimeUnassignedTasks(eightHoursAgo, tasksBuilder.build()); + // Nothing reported because unassigned jobs only just detected + assertThat(itemsToReport, empty()); + + tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job1", null, JobState.OPENED, tasksBuilder); + addJobTask("job2", "node1", JobState.OPENED, tasksBuilder); + addJobTask("job3", null, JobState.OPENED, tasksBuilder); + addJobTask("job4", "node2", JobState.OPENED, tasksBuilder); + addJobTask("job5", null, JobState.OPENED, tasksBuilder); + itemsToReport = notifier.findLongTimeUnassignedTasks(sevenHoursAgo, tasksBuilder.build()); + // Jobs 3 and 5 still unassigned so should get reported, job 4 now assigned, job 1 only just detected unassigned + assertThat( + itemsToReport, + containsInAnyOrder("[xpack/ml/job]/[job3] unassigned for [3600] seconds", "[xpack/ml/job]/[job5] unassigned for [3600] seconds") + ); + + tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job1", null, JobState.OPENED, tasksBuilder); + addJobTask("job2", null, JobState.OPENED, tasksBuilder); + addJobTask("job3", null, JobState.OPENED, tasksBuilder); + addJobTask("job4", "node2", JobState.OPENED, tasksBuilder); + addJobTask("job5", null, JobState.OPENED, tasksBuilder); + itemsToReport = notifier.findLongTimeUnassignedTasks(twoHoursAgo, tasksBuilder.build()); + // Jobs 3 and 5 still unassigned but reported less than 6 hours ago, job 1 still unassigned so gets reported now, + // job 2 only just detected unassigned + assertThat(itemsToReport, contains("[xpack/ml/job]/[job1] unassigned for [18000] seconds")); + + tasksBuilder = PersistentTasksCustomMetadata.builder(); + addJobTask("job1", null, JobState.OPENED, tasksBuilder); + addJobTask("job2", null, JobState.OPENED, tasksBuilder); + addJobTask("job3", null, JobState.OPENED, tasksBuilder); + addJobTask("job4", null, JobState.OPENED, tasksBuilder); + addJobTask("job5", "node1", JobState.OPENED, tasksBuilder); + itemsToReport = notifier.findLongTimeUnassignedTasks(now, tasksBuilder.build()); + // Job 3 still unassigned and reported more than 6 hours ago, job 1 still unassigned but reported less than 6 hours ago, + // job 2 still unassigned so gets reported now, job 4 only just detected unassigned, job 5 now assigned + assertThat( + itemsToReport, + containsInAnyOrder( + "[xpack/ml/job]/[job2] unassigned for [7200] seconds", + "[xpack/ml/job]/[job3] unassigned for [28800] seconds" + ) + ); + } } From 09edf6cc424666cafd7898314472e25883f97715 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Tue, 3 Oct 2023 09:49:47 +0100 Subject: [PATCH 057/136] Mute MLModelDeploymentsUpgradeIT::testTrainedModelDeployment (#100181) For #100180 --- .../org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java index b9fbf0b6b1f03..4912bff3518f0 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/MLModelDeploymentsUpgradeIT.java @@ -97,6 +97,7 @@ public void removeLogging() throws IOException { client().performRequest(request); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100180") public void testTrainedModelDeployment() throws Exception { assumeTrue("NLP model deployments added in 8.0", UPGRADE_FROM_VERSION.onOrAfter(Version.V_8_0_0)); From d97ca1d1f399414cc313f918f2c728c6401aec3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Przemys=C5=82aw=20Witek?= Date: Tue, 3 Oct 2023 10:54:10 +0200 Subject: [PATCH 058/136] Revert "[Transform] improve transform log/audit correctness when stopped (#99917)" (#100178) Unfortunately, PR https://github.com/elastic/elasticsearch/pull/99917 has caused some unexpected log spam. It's better to revert it in order to make log/audit messages less confusing. Reverts elastic/elasticsearch#99917 --- docs/changelog/99917.yaml | 5 ----- .../transforms/ClientTransformIndexer.java | 1 - .../transform/transforms/TransformIndexer.java | 17 +++++++++-------- 3 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 docs/changelog/99917.yaml diff --git a/docs/changelog/99917.yaml b/docs/changelog/99917.yaml deleted file mode 100644 index 6fe77926679f8..0000000000000 --- a/docs/changelog/99917.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 99917 -summary: Improve transform log/audit correctness when stopped -area: Transform -type: enhancement -issues: [] diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/ClientTransformIndexer.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/ClientTransformIndexer.java index 27dce82b64c3a..00fa7f200a3c3 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/ClientTransformIndexer.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/ClientTransformIndexer.java @@ -395,7 +395,6 @@ protected void afterFinishOrFailure() { @Override protected void onStop() { - logger.debug(() -> format("[%s] transform initiating stop", transformConfig.getId())); closePointInTime(); super.onStop(); } diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformIndexer.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformIndexer.java index 6296bdf1277ff..9294aef87526d 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformIndexer.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/transforms/TransformIndexer.java @@ -564,10 +564,7 @@ private void finalizeCheckpoint(ActionListener listener) { @Override protected void afterFinishOrFailure() { - finishIndexerThreadShutdown(() -> { - auditor.info(transformConfig.getId(), "Transform has stopped."); - logger.info("[{}] transform has stopped.", transformConfig.getId()); - }); + finishIndexerThreadShutdown(); } @Override @@ -649,6 +646,12 @@ protected void onFailure(Exception exc) { } } + @Override + protected void onStop() { + auditor.info(transformConfig.getId(), "Transform has stopped."); + logger.info("[{}] transform has stopped.", transformConfig.getId()); + } + @Override protected void onAbort() { auditor.info(transformConfig.getId(), "Received abort request, stopping transform."); @@ -1200,7 +1203,7 @@ private void startIndexerThreadShutdown() { } } - private void finishIndexerThreadShutdown(Runnable next) { + private void finishIndexerThreadShutdown() { synchronized (context) { indexerThreadShuttingDown = false; if (saveStateRequestedDuringIndexerThreadShutdown) { @@ -1209,9 +1212,7 @@ private void finishIndexerThreadShutdown(Runnable next) { if (context.shouldStopAtCheckpoint() && nextCheckpoint == null) { stop(); } - doSaveState(getState(), getPosition(), next); - } else { - next.run(); + doSaveState(getState(), getPosition(), () -> {}); } } } From ff51a6db4057f5b0489d550845f2252c0c340af6 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 3 Oct 2023 12:53:20 +0200 Subject: [PATCH 059/136] Make runtime fields extensible for plugins (#100185) This commit changes the visibility of runtime fields building blocks so they can be extended from plugins. --- .../index/mapper/AbstractScriptFieldType.java | 16 ++++----- .../index/mapper/BooleanScriptFieldType.java | 8 +++-- .../index/mapper/DateScriptFieldType.java | 12 ++++--- .../index/mapper/DoubleScriptFieldType.java | 8 +++-- .../index/mapper/GeoPointScriptFieldType.java | 8 +++-- .../index/mapper/IpScriptFieldType.java | 8 +++-- .../index/mapper/KeywordScriptFieldType.java | 8 +++-- .../index/mapper/LongScriptFieldType.java | 8 +++-- .../script/AbstractFieldScript.java | 2 +- .../runtime/AbstractScriptFieldQuery.java | 6 ++-- .../mapper/BooleanScriptFieldTypeTests.java | 11 ++++++ .../mapper/DateScriptFieldTypeTests.java | 11 ++++++ .../mapper/DoubleScriptFieldTypeTests.java | 11 ++++++ .../mapper/GeoPointScriptFieldTypeTests.java | 11 ++++++ .../index/mapper/IpScriptFieldTypeTests.java | 11 ++++++ .../mapper/KeywordScriptFieldTypeTests.java | 11 ++++++ .../mapper/LongScriptFieldTypeTests.java | 11 ++++++ ...bstractNonTextScriptFieldTypeTestCase.java | 2 +- .../AbstractScriptFieldTypeTestCase.java | 35 ++++--------------- .../index/mapper/FieldScriptTestCase.java | 0 .../index/mapper/TestScriptEngine.java | 0 .../AbstractScriptFieldQueryTestCase.java | 0 .../AbstractScriptFieldQueryTests.java | 0 23 files changed, 133 insertions(+), 65 deletions(-) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/index/mapper/AbstractNonTextScriptFieldTypeTestCase.java (95%) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java (92%) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/index/mapper/FieldScriptTestCase.java (100%) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/index/mapper/TestScriptEngine.java (100%) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/search/runtime/AbstractScriptFieldQueryTestCase.java (100%) rename {server/src/test => test/framework/src/main}/java/org/elasticsearch/search/runtime/AbstractScriptFieldQueryTests.java (100%) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java index 1b5bd9ee2bb8e..53c860530e421 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java @@ -41,13 +41,13 @@ /** * Abstract base {@linkplain MappedFieldType} for runtime fields based on a script. */ -abstract class AbstractScriptFieldType extends MappedFieldType { +public abstract class AbstractScriptFieldType extends MappedFieldType { protected final Script script; private final Function factory; private final boolean isResultDeterministic; - AbstractScriptFieldType( + protected AbstractScriptFieldType( String name, Function factory, Script script, @@ -221,7 +221,7 @@ public void validateMatchedRoutingPath(final String routingPath) { // TODO rework things so that we don't need this protected static final Script DEFAULT_SCRIPT = new Script(""); - abstract static class Builder extends RuntimeField.Builder { + protected abstract static class Builder extends RuntimeField.Builder { private final ScriptContext scriptContext; private final FieldMapper.Parameter