diff --git a/docs/changelog/116277.yaml b/docs/changelog/116277.yaml new file mode 100644 index 0000000000000..62262b7797783 --- /dev/null +++ b/docs/changelog/116277.yaml @@ -0,0 +1,6 @@ +pr: 116277 +summary: Update Semantic Query To Handle Zero Size Responses +area: Vector Search +type: bug +issues: + - 116083 diff --git a/docs/changelog/116915.yaml b/docs/changelog/116915.yaml new file mode 100644 index 0000000000000..9686f0023a14a --- /dev/null +++ b/docs/changelog/116915.yaml @@ -0,0 +1,5 @@ +pr: 116915 +summary: Improve message about insecure S3 settings +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/docs/changelog/116918.yaml b/docs/changelog/116918.yaml new file mode 100644 index 0000000000000..3b04b4ae4a69a --- /dev/null +++ b/docs/changelog/116918.yaml @@ -0,0 +1,5 @@ +pr: 116918 +summary: Split searchable snapshot into multiple repo operations +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/docs/changelog/116931.yaml b/docs/changelog/116931.yaml new file mode 100644 index 0000000000000..8b31d236ff137 --- /dev/null +++ b/docs/changelog/116931.yaml @@ -0,0 +1,5 @@ +pr: 116931 +summary: Enable built-in Inference Endpoints and default for Semantic Text +area: "Machine Learning" +type: enhancement +issues: [] diff --git a/docs/changelog/116942.yaml b/docs/changelog/116942.yaml new file mode 100644 index 0000000000000..5037e8c59cd85 --- /dev/null +++ b/docs/changelog/116942.yaml @@ -0,0 +1,5 @@ +pr: 116942 +summary: Fix handling of bulk requests with semantic text fields and delete ops +area: Relevance +type: bug +issues: [] diff --git a/docs/changelog/116995.yaml b/docs/changelog/116995.yaml new file mode 100644 index 0000000000000..a0467c630edf3 --- /dev/null +++ b/docs/changelog/116995.yaml @@ -0,0 +1,5 @@ +pr: 116995 +summary: "Apm-data: disable date_detection for all apm data streams" +area: Data streams +type: enhancement +issues: [] \ No newline at end of file diff --git a/docs/reference/esql/esql-language.asciidoc b/docs/reference/esql/esql-language.asciidoc index a7c0e5e01a867..151ca803bf2eb 100644 --- a/docs/reference/esql/esql-language.asciidoc +++ b/docs/reference/esql/esql-language.asciidoc @@ -14,6 +14,7 @@ Detailed reference documentation for the {esql} language: * <> * <> * <> +* <> include::esql-syntax.asciidoc[] include::esql-commands.asciidoc[] @@ -23,3 +24,4 @@ include::multivalued-fields.asciidoc[] include::esql-process-data-with-dissect-grok.asciidoc[] include::esql-enrich-data.asciidoc[] include::implicit-casting.asciidoc[] +include::time-spans.asciidoc[] diff --git a/docs/reference/esql/esql-syntax.asciidoc b/docs/reference/esql/esql-syntax.asciidoc index c7f741d064310..ba1c4ca820381 100644 --- a/docs/reference/esql/esql-syntax.asciidoc +++ b/docs/reference/esql/esql-syntax.asciidoc @@ -157,21 +157,15 @@ FROM employees ==== Timespan literals Datetime intervals and timespans can be expressed using timespan literals. -Timespan literals are a combination of a number and a qualifier. These -qualifiers are supported: - -* `millisecond`/`milliseconds`/`ms` -* `second`/`seconds`/`sec`/`s` -* `minute`/`minutes`/`min` -* `hour`/`hours`/`h` -* `day`/`days`/`d` -* `week`/`weeks`/`w` -* `month`/`months`/`mo` -* `quarter`/`quarters`/`q` -* `year`/`years`/`yr`/`y` +Timespan literals are a combination of a number and a temporal unit. The +supported temporal units are listed in <>. +More examples of the usages of time spans can be found in +<>. + Timespan literals are not whitespace sensitive. These expressions are all valid: * `1day` * `1 day` * `1 day` + diff --git a/docs/reference/esql/functions/binary.asciidoc b/docs/reference/esql/functions/binary.asciidoc index 72d466ae83d11..59bdadecc4923 100644 --- a/docs/reference/esql/functions/binary.asciidoc +++ b/docs/reference/esql/functions/binary.asciidoc @@ -87,6 +87,7 @@ Supported types: include::types/greater_than_or_equal.asciidoc[] +[[esql-add]] ==== Add `+` [.text-center] image::esql/functions/signature/add.svg[Embedded,opts=inline] @@ -98,6 +99,7 @@ Supported types: include::types/add.asciidoc[] +[[esql-subtract]] ==== Subtract `-` [.text-center] image::esql/functions/signature/sub.svg[Embedded,opts=inline] diff --git a/docs/reference/esql/functions/examples/count.asciidoc b/docs/reference/esql/functions/examples/count.asciidoc index fb696b51e054c..33ed054d3d1e2 100644 --- a/docs/reference/esql/functions/examples/count.asciidoc +++ b/docs/reference/esql/functions/examples/count.asciidoc @@ -37,7 +37,7 @@ include::{esql-specs}/stats.csv-spec[tag=count-where] |=== include::{esql-specs}/stats.csv-spec[tag=count-where-result] |=== -To count the same stream of data based on two different expressions use the pattern `COUNT( OR NULL)` +To count the same stream of data based on two different expressions use the pattern `COUNT( OR NULL)`. This builds on the three-valued logic ({wikipedia}/Three-valued_logic[3VL]) of the language: `TRUE OR NULL` is `TRUE`, but `FALSE OR NULL` is `NULL`, plus the way COUNT handles `NULL`s: `COUNT(TRUE)` and `COUNT(FALSE)` are both 1, but `COUNT(NULL)` is 0. [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=count-or-null] diff --git a/docs/reference/esql/implicit-casting.asciidoc b/docs/reference/esql/implicit-casting.asciidoc index ffb6d3fc35acb..b24be0b645472 100644 --- a/docs/reference/esql/implicit-casting.asciidoc +++ b/docs/reference/esql/implicit-casting.asciidoc @@ -5,7 +5,7 @@ Implicit casting ++++ -Often users will input `date`, `ip`, `version`, `date_period` or `time_duration` as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <> to explicitly convert these strings into the desired data types. +Often users will input `date`, `date_period`, `time_duration`, `ip` or `version` as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <> to explicitly convert these strings into the desired data types. Without implicit casting users must explicitly code these `to_X` functions in their queries, when string literals don't match the target data types they are assigned or compared to. Here is an example of using `to_datetime` to explicitly perform a data type conversion. @@ -18,7 +18,10 @@ FROM employees | LIMIT 1 ---- -Implicit casting improves usability, by automatically converting string literals to the target data type. This is most useful when the target data type is `date`, `ip`, `version`, `date_period` or `time_duration`. It is natural to specify these as a string in queries. +[discrete] +[[esql-implicit-casting-example]] +==== Implicit casting example +Implicit casting automatically converts string literals to the target data type. This allows users to specify string values for types like `date`, `date_period`, `time_duration`, `ip` and `version` in their queries. The first query can be coded without calling the `to_datetime` function, as follows: @@ -31,35 +34,36 @@ FROM employees | LIMIT 1 ---- -[float] -=== Implicit casting support +[discrete] +[[esql-implicit-casting-supported-operations]] +==== Operations that support implicit casting The following table details which {esql} operations support implicit casting for different data types. [%header.monospaced.styled,format=dsv,separator=|] |=== -||ScalarFunction*|Operator*|<>|<> -|DATE|Y|Y|Y|N -|IP|Y|Y|Y|N -|VERSION|Y|Y|Y|N -|BOOLEAN|Y|Y|Y|N -|DATE_PERIOD/TIME_DURATION|Y|N|Y|N +|ScalarFunctions|Operators|<>|<> +DATE|Y|Y|Y|N +DATE_PERIOD/TIME_DURATION|Y|N|Y|N +IP|Y|Y|Y|N +VERSION|Y|Y|Y|N +BOOLEAN|Y|Y|Y|N |=== -ScalarFunction* includes: +ScalarFunctions includes: -<> +* <> -<> +* <> -<> +* <> -Operator* includes: +Operators includes: -<> +* <> -<> +* <> -<> +* <> diff --git a/docs/reference/esql/time-spans.asciidoc b/docs/reference/esql/time-spans.asciidoc new file mode 100644 index 0000000000000..d2aa0c4fa252e --- /dev/null +++ b/docs/reference/esql/time-spans.asciidoc @@ -0,0 +1,111 @@ +[[esql-time-spans]] +=== {esql} time spans + +++++ +Time spans +++++ + +Time spans represent intervals between two datetime values. There are currently two supported types of time spans: + +* `DATE_PERIOD` specifies intervals in years, quarters, months, weeks and days +* `TIME_DURATION` specifies intervals in hours, minutes, seconds and milliseconds + +A time span requires two elements: an integer value and a temporal unit. + +Time spans work with grouping functions such as <>, scalar functions such as <> and arithmetic operators such as <> and <>. Convert strings to time spans using <>, <>, or the cast operators `::DATE_PERIOD`, `::TIME_DURATION`. + +[discrete] +[[esql-time-spans-examples]] +==== Examples of using time spans in {esql} + + +With `BUCKET`: +[source.merge.styled,esql] +---- +include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/bucket.csv-spec[tag=docsBucketWeeklyHistogramWithSpan-result] +|=== + + +With `DATE_TRUNC`: +[source.merge.styled,esql] +---- +include::{esql-specs}/date.csv-spec[tag=docsDateTrunc] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/date.csv-spec[tag=docsDateTrunc-result] +|=== + + +With `+` and/or `-`: +[source.merge.styled,esql] +---- +include::{esql-specs}/date.csv-spec[tag=docsNowWhere] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/date.csv-spec[tag=docsNowWhere-result] +|=== + + +When a time span is provided as a named parameter in string format, `TO_DATEPERIOD`, `::DATE_PERIOD`, `TO_TIMEDURATION` or `::TIME_DURATION` can be used to convert to its corresponding time span value for arithmetic operations like `+` and/or `-`. +[source, esql] +---- +POST /_query +{ + "query": """ + FROM employees + | EVAL x = hire_date + ?timespan::DATE_PERIOD, y = hire_date - TO_DATEPERIOD(?timespan) + """, + "params": [{"timespan" : "1 day"}] +} +---- + +When a time span is provided as a named parameter in string format, it can be automatically converted to its corresponding time span value in grouping functions and scalar functions, like `BUCKET` and `DATE_TRUNC`. +[source, esql] +---- +POST /_query +{ + "query": """ + FROM employees + | WHERE hire_date >= "1985-01-01T00:00:00Z" AND hire_date < "1986-01-01T00:00:00Z" + | STATS hires_per_week = COUNT(*) BY week = BUCKET(hire_date, ?timespan) + | SORT week + """, + "params": [{"timespan" : "1 week"}] +} +---- + +[source, esql] +---- +POST /_query +{ + "query": """ + FROM employees + | KEEP first_name, last_name, hire_date + | EVAL year_hired = DATE_TRUNC(?timespan, hire_date) + """, + "params": [{"timespan" : "1 year"}] +} +---- + +[discrete] +[[esql-time-spans-table]] +==== Supported temporal units +[%header.monospaced.styled,format=dsv,separator=|] +|=== +Temporal Units|Valid Abbreviations +year|y, yr, years +quarter|q, quarters +month|mo, months +week|w, weeks +day|d, days +hour|h, hours +minute|min, minutes +second|s, sec, seconds +millisecond|ms, milliseconds +|=== diff --git a/gradle/build.versions.toml b/gradle/build.versions.toml index e3148c6f3ef2e..1bdd93e3a7470 100644 --- a/gradle/build.versions.toml +++ b/gradle/build.versions.toml @@ -1,5 +1,5 @@ [versions] -asm = "9.6" +asm = "9.7.1" jackson = "2.15.0" junit5 = "5.8.1" spock = "2.1-groovy-3.0" diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java index 2b4fea0327e86..29e8aec00a02d 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/AdjacencyMatrixAggregator.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -177,65 +178,66 @@ public void collect(int doc, long bucket) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { // Buckets are ordered into groups - [keyed filters] [key1&key2 intersects] - int maxOrd = owningBucketOrds.length * totalNumKeys; - int totalBucketsToBuild = 0; - for (int ord = 0; ord < maxOrd; ord++) { + long maxOrd = owningBucketOrds.size() * totalNumKeys; + long totalBucketsToBuild = 0; + for (long ord = 0; ord < maxOrd; ord++) { if (bucketDocCount(ord) > 0) { totalBucketsToBuild++; } } - long[] bucketOrdsToBuild = new long[totalBucketsToBuild]; - int builtBucketIndex = 0; - for (int ord = 0; ord < maxOrd; ord++) { - if (bucketDocCount(ord) > 0) { - bucketOrdsToBuild[builtBucketIndex++] = ord; - } - } - assert builtBucketIndex == totalBucketsToBuild; - builtBucketIndex = 0; - var bucketSubAggs = buildSubAggsForBuckets(bucketOrdsToBuild); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int owningBucketOrdIdx = 0; owningBucketOrdIdx < owningBucketOrds.length; owningBucketOrdIdx++) { - List buckets = new ArrayList<>(filters.length); - for (int i = 0; i < keys.length; i++) { - long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], i); - long docCount = bucketDocCount(bucketOrd); - // Empty buckets are not returned because this aggregation will commonly be used under a - // a date-histogram where we will look for transactions over time and can expect many - // empty buckets. - if (docCount > 0) { - InternalAdjacencyMatrix.InternalBucket bucket = new InternalAdjacencyMatrix.InternalBucket( - keys[i], - docCount, - bucketSubAggs.apply(builtBucketIndex++) - ); - buckets.add(bucket); + try (LongArray bucketOrdsToBuild = bigArrays().newLongArray(totalBucketsToBuild)) { + int builtBucketIndex = 0; + for (int ord = 0; ord < maxOrd; ord++) { + if (bucketDocCount(ord) > 0) { + bucketOrdsToBuild.set(builtBucketIndex++, ord); } } - int pos = keys.length; - for (int i = 0; i < keys.length; i++) { - for (int j = i + 1; j < keys.length; j++) { - long bucketOrd = bucketOrd(owningBucketOrds[owningBucketOrdIdx], pos); + assert builtBucketIndex == totalBucketsToBuild; + builtBucketIndex = 0; + var bucketSubAggs = buildSubAggsForBuckets(bucketOrdsToBuild); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int owningBucketOrdIdx = 0; owningBucketOrdIdx < results.length; owningBucketOrdIdx++) { + List buckets = new ArrayList<>(filters.length); + for (int i = 0; i < keys.length; i++) { + long bucketOrd = bucketOrd(owningBucketOrds.get(owningBucketOrdIdx), i); long docCount = bucketDocCount(bucketOrd); - // Empty buckets are not returned due to potential for very sparse matrices + // Empty buckets are not returned because this aggregation will commonly be used under a + // a date-histogram where we will look for transactions over time and can expect many + // empty buckets. if (docCount > 0) { - String intersectKey = keys[i] + separator + keys[j]; InternalAdjacencyMatrix.InternalBucket bucket = new InternalAdjacencyMatrix.InternalBucket( - intersectKey, + keys[i], docCount, bucketSubAggs.apply(builtBucketIndex++) ); buckets.add(bucket); } - pos++; } + int pos = keys.length; + for (int i = 0; i < keys.length; i++) { + for (int j = i + 1; j < keys.length; j++) { + long bucketOrd = bucketOrd(owningBucketOrds.get(owningBucketOrdIdx), pos); + long docCount = bucketDocCount(bucketOrd); + // Empty buckets are not returned due to potential for very sparse matrices + if (docCount > 0) { + String intersectKey = keys[i] + separator + keys[j]; + InternalAdjacencyMatrix.InternalBucket bucket = new InternalAdjacencyMatrix.InternalBucket( + intersectKey, + docCount, + bucketSubAggs.apply(builtBucketIndex++) + ); + buckets.add(bucket); + } + pos++; + } + } + results[owningBucketOrdIdx] = new InternalAdjacencyMatrix(name, buckets, metadata()); } - results[owningBucketOrdIdx] = new InternalAdjacencyMatrix(name, buckets, metadata()); + assert builtBucketIndex == totalBucketsToBuild; + return results; } - assert builtBucketIndex == totalBucketsToBuild; - return results; } @Override diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index d4e1c2928c441..6add1b0ac4a13 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -141,7 +141,7 @@ public final LeafBucketCollector getLeafCollector(AggregationExecutionContext ag protected final InternalAggregation[] buildAggregations( LongKeyedBucketOrds bucketOrds, LongToIntFunction roundingIndexFor, - long[] owningBucketOrds + LongArray owningBucketOrds ) throws IOException { return buildAggregationsForVariableBuckets( owningBucketOrds, @@ -324,7 +324,7 @@ private void increaseRoundingIfNeeded(long rounded) { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregations(bucketOrds, l -> roundingIdx, owningBucketOrds); } @@ -594,7 +594,7 @@ private void rebucket() { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { /* * Rebucket before building the aggregation to build as small as result * as possible. diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java index c74637330dd7a..1263d4282a18a 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/TimeSeriesAggregator.java @@ -11,6 +11,8 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.mapper.RoutingPathFields; @@ -30,6 +32,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.Map; @@ -67,42 +70,43 @@ public TimeSeriesAggregator( } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { BytesRef spare = new BytesRef(); - InternalTimeSeries.InternalBucket[][] allBucketsPerOrd = new InternalTimeSeries.InternalBucket[owningBucketOrds.length][]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - List buckets = new ArrayList<>(); - while (ordsEnum.next()) { - long docCount = bucketDocCount(ordsEnum.ord()); - ordsEnum.readValue(spare); - InternalTimeSeries.InternalBucket bucket = new InternalTimeSeries.InternalBucket( - BytesRef.deepCopyOf(spare), // Closing bucketOrds will corrupt the bytes ref, so need to make a deep copy here. - docCount, - null, - keyed - ); - bucket.bucketOrd = ordsEnum.ord(); - buckets.add(bucket); - if (buckets.size() >= size) { - break; + try (ObjectArray allBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size())) { + for (long ordIdx = 0; ordIdx < allBucketsPerOrd.size(); ordIdx++) { + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); + List buckets = new ArrayList<>(); + while (ordsEnum.next()) { + long docCount = bucketDocCount(ordsEnum.ord()); + ordsEnum.readValue(spare); + InternalTimeSeries.InternalBucket bucket = new InternalTimeSeries.InternalBucket( + BytesRef.deepCopyOf(spare), // Closing bucketOrds will corrupt the bytes ref, so need to make a deep copy here. + docCount, + null, + keyed + ); + bucket.bucketOrd = ordsEnum.ord(); + buckets.add(bucket); + if (buckets.size() >= size) { + break; + } } + // NOTE: after introducing _tsid hashing time series are sorted by (_tsid hash, @timestamp) instead of (_tsid, timestamp). + // _tsid hash and _tsid might sort differently, and out of order data might result in incorrect buckets due to _tsid value + // changes not matching _tsid hash changes. Changes in _tsid hash are handled creating a new bucket as a result of making + // the assumption that sorting data results in new buckets whenever there is a change in _tsid hash. This is no true anymore + // because we collect data sorted on (_tsid hash, timestamp) but build aggregation results sorted by (_tsid, timestamp). + buckets.sort(Comparator.comparing(bucket -> bucket.key)); + allBucketsPerOrd.set(ordIdx, buckets.toArray(new InternalTimeSeries.InternalBucket[0])); } - // NOTE: after introducing _tsid hashing time series are sorted by (_tsid hash, @timestamp) instead of (_tsid, timestamp). - // _tsid hash and _tsid might sort differently, and out of order data might result in incorrect buckets due to _tsid value - // changes not matching _tsid hash changes. Changes in _tsid hash are handled creating a new bucket as a result of making - // the assumption that sorting data results in new buckets whenever there is a change in _tsid hash. This is no true anymore - // because we collect data sorted on (_tsid hash, timestamp) but build aggregation results sorted by (_tsid, timestamp). - buckets.sort(Comparator.comparing(bucket -> bucket.key)); - allBucketsPerOrd[ordIdx] = buckets.toArray(new InternalTimeSeries.InternalBucket[0]); - } - buildSubAggsForAllBuckets(allBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); + buildSubAggsForAllBuckets(allBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - result[ordIdx] = buildResult(allBucketsPerOrd[ordIdx]); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(allBucketsPerOrd.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + result[ordIdx] = buildResult(allBucketsPerOrd.get(ordIdx)); + } + return result; } - return result; } @Override @@ -185,7 +189,7 @@ public void collect(int doc, long bucket) throws IOException { } InternalTimeSeries buildResult(InternalTimeSeries.InternalBucket[] topBuckets) { - return new InternalTimeSeries(name, List.of(topBuckets), keyed, metadata()); + return new InternalTimeSeries(name, Arrays.asList(topBuckets), keyed, metadata()); } @FunctionalInterface diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ChildrenToParentAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ChildrenToParentAggregator.java index 6985f6da98cf1..12489ad37aabd 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ChildrenToParentAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ChildrenToParentAggregator.java @@ -9,6 +9,7 @@ package org.elasticsearch.join.aggregations; import org.apache.lucene.search.Query; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; @@ -44,7 +45,7 @@ public ChildrenToParentAggregator( } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalParent( diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java index 258cbe743d7d3..122286533ec02 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentJoinAggregator.java @@ -21,6 +21,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.BitArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.aggregations.AggregationExecutionContext; @@ -115,7 +116,7 @@ public void postCollection() throws IOException { } @Override - protected void prepareSubAggs(long[] ordsToCollect) throws IOException { + protected void prepareSubAggs(LongArray ordsToCollect) throws IOException { IndexReader indexReader = searcher().getIndexReader(); for (LeafReaderContext ctx : indexReader.leaves()) { Scorer childDocsScorer = outFilter.scorer(ctx); @@ -158,9 +159,10 @@ public int docID() { * structure that maps a primitive long to a list of primitive * longs. */ - for (long owningBucketOrd : ordsToCollect) { - if (collectionStrategy.exists(owningBucketOrd, globalOrdinal)) { - collectBucket(sub, docId, owningBucketOrd); + for (long ord = 0; ord < ordsToCollect.size(); ord++) { + long ordToCollect = ordsToCollect.get(ord); + if (collectionStrategy.exists(ordToCollect, globalOrdinal)) { + collectBucket(sub, docId, ordToCollect); } } } diff --git a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregator.java b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregator.java index d8a061a2de6d9..939107f87715d 100644 --- a/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregator.java +++ b/modules/parent-join/src/main/java/org/elasticsearch/join/aggregations/ParentToChildrenAggregator.java @@ -9,6 +9,7 @@ package org.elasticsearch.join.aggregations; import org.apache.lucene.search.Query; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; @@ -40,7 +41,7 @@ public ParentToChildrenAggregator( } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalChildren( diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestIT.java index ead2cb36ad150..dcd29c6d26c6e 100644 --- a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestIT.java @@ -51,8 +51,9 @@ protected String getTestRestCluster() { return cluster.getHttpAddresses(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/116811") public void testReloadCredentialsFromKeystore() throws IOException { + assumeFalse("doesn't work in a FIPS JVM, but that's ok", inFipsJvm()); + // Register repository (?verify=false because we don't have access to the blob store yet) final var repositoryName = randomIdentifier(); registerRepository( diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index 4597f93d38b92..e7687b2d6774f 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -288,8 +288,7 @@ class S3Repository extends MeteredBlobStoreRepository { deprecationLogger.critical( DeprecationCategory.SECURITY, "s3_repository_secret_settings", - "Using s3 access/secret key from repository settings. Instead " - + "store these in named clients and the elasticsearch keystore for secure settings." + INSECURE_CREDENTIALS_DEPRECATION_WARNING ); } @@ -306,6 +305,11 @@ class S3Repository extends MeteredBlobStoreRepository { ); } + static final String INSECURE_CREDENTIALS_DEPRECATION_WARNING = Strings.format(""" + This repository's settings include a S3 access key and secret key, but repository settings are stored in plaintext and must not be \ + used for security-sensitive information. Instead, store all secure settings in the keystore. See [%s] for more information.\ + """, ReferenceDocs.SECURE_SETTINGS); + private static Map buildLocation(RepositoryMetadata metadata) { return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings())); } diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RepositoryCredentialsTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RepositoryCredentialsTests.java index 52fe152ba41e3..8e5f6634372db 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RepositoryCredentialsTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/RepositoryCredentialsTests.java @@ -107,10 +107,9 @@ public void testRepositoryCredentialsOverrideSecureCredentials() { assertThat(credentials.getAWSSecretKey(), is("insecure_aws_secret")); assertCriticalWarnings( + "[access_key] setting was deprecated in Elasticsearch and will be removed in a future release.", "[secret_key] setting was deprecated in Elasticsearch and will be removed in a future release.", - "Using s3 access/secret key from repository settings. Instead store these in named clients and" - + " the elasticsearch keystore for secure settings.", - "[access_key] setting was deprecated in Elasticsearch and will be removed in a future release." + S3Repository.INSECURE_CREDENTIALS_DEPRECATION_WARNING ); } @@ -194,10 +193,9 @@ public void testReinitSecureCredentials() { if (hasInsecureSettings) { assertCriticalWarnings( + "[access_key] setting was deprecated in Elasticsearch and will be removed in a future release.", "[secret_key] setting was deprecated in Elasticsearch and will be removed in a future release.", - "Using s3 access/secret key from repository settings. Instead store these in named clients and" - + " the elasticsearch keystore for secure settings.", - "[access_key] setting was deprecated in Elasticsearch and will be removed in a future release." + S3Repository.INSECURE_CREDENTIALS_DEPRECATION_WARNING ); } } @@ -238,10 +236,7 @@ public void sendResponse(RestResponse response) { throw error.get(); } - assertWarnings( - "Using s3 access/secret key from repository settings. Instead store these in named clients and" - + " the elasticsearch keystore for secure settings." - ); + assertWarnings(S3Repository.INSECURE_CREDENTIALS_DEPRECATION_WARNING); } private void createRepository(final String name, final Settings repositorySettings) { diff --git a/muted-tests.yml b/muted-tests.yml index 43d389f265e8a..6a49e0626a6a2 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -17,9 +17,6 @@ tests: - class: org.elasticsearch.upgrades.SecurityIndexRolesMetadataMigrationIT method: testMetadataMigratedAfterUpgrade issue: https://github.com/elastic/elasticsearch/issues/110232 -- class: "org.elasticsearch.xpack.searchablesnapshots.FrozenSearchableSnapshotsIntegTests" - issue: "https://github.com/elastic/elasticsearch/issues/110408" - method: "testCreateAndRestorePartialSearchableSnapshot" - class: org.elasticsearch.xpack.security.authz.store.NativePrivilegeStoreCacheTests method: testPopulationOfCacheWhenLoadingPrivilegesForAllApplications issue: https://github.com/elastic/elasticsearch/issues/110789 @@ -205,6 +202,9 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {categorize.Categorize SYNC} issue: https://github.com/elastic/elasticsearch/issues/113722 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {categorize.Categorize ASYNC} + issue: https://github.com/elastic/elasticsearch/issues/116373 - class: org.elasticsearch.kibana.KibanaThreadPoolIT method: testBlockedThreadPoolsRejectUserRequests issue: https://github.com/elastic/elasticsearch/issues/113939 @@ -327,11 +327,33 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/116542 - class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT issue: https://github.com/elastic/elasticsearch/issues/116817 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - issue: https://github.com/elastic/elasticsearch/issues/116444 - class: org.elasticsearch.xpack.spatial.search.GeoGridAggAndQueryConsistencyIT method: testGeoShapeGeoHex issue: https://github.com/elastic/elasticsearch/issues/115705 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {spatial.CentroidFromAirportsAfterIntersectsCompoundPredicateNoDocValues SYNC} + issue: https://github.com/elastic/elasticsearch/issues/116945 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {spatial.CentroidFromAirportsAfterIntersectsCompoundPredicateNoDocValues ASYNC} + issue: https://github.com/elastic/elasticsearch/issues/116945 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {spatial.CentroidFromAirportsAfterIntersectsCompoundPredicateNotIndexedNorDocValues SYNC} + issue: https://github.com/elastic/elasticsearch/issues/116945 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {spatial.CentroidFromAirportsAfterIntersectsCompoundPredicateNotIndexedNorDocValues ASYNC} + issue: https://github.com/elastic/elasticsearch/issues/116945 +- class: org.elasticsearch.search.basic.SearchWithRandomIOExceptionsIT + method: testRandomDirectoryIOExceptions + issue: https://github.com/elastic/elasticsearch/issues/114824 +- class: org.elasticsearch.xpack.application.CohereServiceUpgradeIT + method: testRerank {upgradedNodes=1} + issue: https://github.com/elastic/elasticsearch/issues/116973 +- class: org.elasticsearch.xpack.application.CohereServiceUpgradeIT + method: testCohereEmbeddings {upgradedNodes=1} + issue: https://github.com/elastic/elasticsearch/issues/116974 +- class: org.elasticsearch.xpack.application.CohereServiceUpgradeIT + method: testCohereEmbeddings {upgradedNodes=2} + issue: https://github.com/elastic/elasticsearch/issues/116975 # Examples: # diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java index d1a68c68e7de5..e5a17c2a482a2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/search/TransportSearchIT.java @@ -27,6 +27,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexSettings; @@ -669,7 +670,7 @@ public Aggregator subAggregator(String aggregatorName) { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) { return new InternalAggregation[] { buildEmptyAggregation() }; } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 6aa13508ec467..824a49a4372f5 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -199,6 +199,8 @@ static TransportVersion def(int id) { public static final TransportVersion VERTEX_AI_INPUT_TYPE_ADDED = def(8_790_00_0); public static final TransportVersion SKIP_INNER_HITS_SEARCH_SOURCE = def(8_791_00_0); public static final TransportVersion QUERY_RULES_LIST_INCLUDES_TYPES = def(8_792_00_0); + public static final TransportVersion INDEX_STATS_ADDITIONAL_FIELDS = def(8_793_00_0); + public static final TransportVersion INDEX_STATS_ADDITIONAL_FIELDS_REVERT = def(8_794_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/RepositoryMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/RepositoryMetadata.java index 9b3abf38c519b..0b9c359006b23 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/RepositoryMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/RepositoryMetadata.java @@ -46,7 +46,11 @@ public class RepositoryMetadata implements Writeable { * @param settings repository settings */ public RepositoryMetadata(String name, String type, Settings settings) { - this(name, RepositoryData.MISSING_UUID, type, settings, RepositoryData.UNKNOWN_REPO_GEN, RepositoryData.EMPTY_REPO_GEN); + this(name, RepositoryData.MISSING_UUID, type, settings); + } + + public RepositoryMetadata(String name, String uuid, String type, Settings settings) { + this(name, uuid, type, settings, RepositoryData.UNKNOWN_REPO_GEN, RepositoryData.EMPTY_REPO_GEN); } public RepositoryMetadata(RepositoryMetadata metadata, long generation, long pendingGeneration) { diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index f3fe488555ef5..e502e592dbf25 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -82,6 +82,7 @@ public enum ReferenceDocs { FORMING_SINGLE_NODE_CLUSTERS, JDK_LOCALE_DIFFERENCES, ALLOCATION_EXPLAIN_MAX_RETRY, + SECURE_SETTINGS, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner ; diff --git a/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java b/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java index 67ac55f7b19eb..192d47589333d 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java +++ b/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java @@ -192,9 +192,7 @@ private InsecureStringSetting(String name) { @Override public SecureString get(Settings settings) { if (ALLOW_INSECURE_SETTINGS == false && exists(settings)) { - throw new IllegalArgumentException( - "Setting [" + name + "] is insecure, " + "but property [allow_insecure_settings] is not set" - ); + throw new IllegalArgumentException("Setting [" + name + "] is insecure, use the elasticsearch keystore instead"); } return super.get(settings); } diff --git a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java index fc39d2d2d80a4..623401220ec68 100644 --- a/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/elasticsearch/repositories/RepositoriesService.java @@ -283,12 +283,22 @@ public RegisterRepositoryTask(final RepositoriesService repositoriesService, fin @Override public ClusterState execute(ClusterState currentState) { - RepositoryMetadata newRepositoryMetadata = new RepositoryMetadata(request.name(), request.type(), request.settings()); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); RepositoriesMetadata repositories = RepositoriesMetadata.get(currentState); List repositoriesMetadata = new ArrayList<>(repositories.repositories().size() + 1); for (RepositoryMetadata repositoryMetadata : repositories.repositories()) { - if (repositoryMetadata.name().equals(newRepositoryMetadata.name())) { + if (repositoryMetadata.name().equals(request.name())) { + final RepositoryMetadata newRepositoryMetadata = new RepositoryMetadata( + request.name(), + // Copy the UUID from the existing instance rather than resetting it back to MISSING_UUID which would force us to + // re-read the RepositoryData to get it again. In principle the new RepositoryMetadata might point to a different + // underlying repository at this point, but if so that'll cause things to fail in clear ways and eventually (before + // writing anything) we'll read the RepositoryData again and update the UUID in the RepositoryMetadata to match. See + // also #109936. + repositoryMetadata.uuid(), + request.type(), + request.settings() + ); Repository existing = repositoriesService.repositories.get(request.name()); if (existing == null) { existing = repositoriesService.internalRepositories.get(request.name()); diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 5df7a8ea20f54..240d41ea589a7 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -200,6 +200,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp public static final String STATELESS_SHARD_WRITE_THREAD_NAME = "stateless_shard_write"; public static final String STATELESS_CLUSTER_STATE_READ_WRITE_THREAD_NAME = "stateless_cluster_state"; public static final String STATELESS_SHARD_PREWARMING_THREAD_NAME = "stateless_prewarm"; + public static final String SEARCHABLE_SNAPSHOTS_CACHE_FETCH_ASYNC_THREAD_NAME = "searchable_snapshots_cache_fetch_async"; + public static final String SEARCHABLE_SNAPSHOTS_CACHE_PREWARMING_THREAD_NAME = "searchable_snapshots_cache_prewarming"; /** * Prefix for the name of the root {@link RepositoryData} blob. @@ -2183,7 +2185,9 @@ private void assertSnapshotOrStatelessPermittedThreadPool() { STATELESS_TRANSLOG_THREAD_NAME, STATELESS_SHARD_WRITE_THREAD_NAME, STATELESS_CLUSTER_STATE_READ_WRITE_THREAD_NAME, - STATELESS_SHARD_PREWARMING_THREAD_NAME + STATELESS_SHARD_PREWARMING_THREAD_NAME, + SEARCHABLE_SNAPSHOTS_CACHE_FETCH_ASYNC_THREAD_NAME, + SEARCHABLE_SNAPSHOTS_CACHE_PREWARMING_THREAD_NAME ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java index b4d5512331b42..d08a76e51c6bd 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/AdaptingAggregator.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.aggregations; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.search.profile.aggregation.InternalAggregationProfileTree; @@ -98,10 +99,10 @@ public final void postCollection() throws IOException { } @Override - public final InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public final InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { InternalAggregation[] delegateResults = delegate.buildAggregations(owningBucketOrds); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { result[ordIdx] = adapt(delegateResults[ordIdx]); } return result; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java index 0d36469dddfdc..aa8d9fba554c1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/Aggregator.java @@ -13,6 +13,8 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasable; import org.elasticsearch.search.aggregations.support.AggregationPath; import org.elasticsearch.search.sort.SortOrder; @@ -142,7 +144,7 @@ public interface BucketComparator { * @return the results for each ordinal, in the same order as the array * of ordinals */ - public abstract InternalAggregation[] buildAggregations(long[] ordsToCollect) throws IOException; + public abstract InternalAggregation[] buildAggregations(LongArray ordsToCollect) throws IOException; /** * Release this aggregation and its sub-aggregations. @@ -153,11 +155,11 @@ public interface BucketComparator { * Build the result of this aggregation if it is at the "top level" * of the aggregation tree. If, instead, it is a sub-aggregation of * another aggregation then the aggregation that contains it will call - * {@link #buildAggregations(long[])}. + * {@link #buildAggregations(LongArray)}. */ public final InternalAggregation buildTopLevel() throws IOException { assert parent() == null; - return buildAggregations(new long[] { 0 })[0]; + return buildAggregations(BigArrays.NON_RECYCLING_INSTANCE.newLongArray(1, true))[0]; } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/NonCollectingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/NonCollectingAggregator.java index 8accc6b15d820..4da2d10cfc0c2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/NonCollectingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/NonCollectingAggregator.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.aggregations; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.support.AggregationContext; import java.io.IOException; @@ -39,9 +40,9 @@ public final LeafBucketCollector getLeafCollector(AggregationExecutionContext ag } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { results[ordIdx] = buildEmptyAggregation(); } return results; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java index 231130c920349..44d76d31be0e7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollector.java @@ -20,6 +20,7 @@ import org.apache.lucene.util.packed.PackedInts; import org.apache.lucene.util.packed.PackedLongValues; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -146,7 +147,7 @@ public void postCollection() throws IOException { * Replay the wrapped collector, but only on a selection of buckets. */ @Override - public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { + public void prepareSelectedBuckets(LongArray selectedBuckets) throws IOException { if (finished == false) { throw new IllegalStateException("Cannot replay yet, collection is not finished: postCollect() has not been called"); } @@ -154,9 +155,9 @@ public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { throw new IllegalStateException("Already been replayed"); } - this.selectedBuckets = new LongHash(selectedBuckets.length, BigArrays.NON_RECYCLING_INSTANCE); - for (long ord : selectedBuckets) { - this.selectedBuckets.add(ord); + this.selectedBuckets = new LongHash(selectedBuckets.size(), BigArrays.NON_RECYCLING_INSTANCE); + for (long i = 0; i < selectedBuckets.size(); i++) { + this.selectedBuckets.add(selectedBuckets.get(i)); } boolean needsScores = scoreMode().needsScores(); @@ -232,21 +233,22 @@ private static void failInCaseOfBadScorer(String message) { * been collected directly. */ @Override - public Aggregator wrap(final Aggregator in) { + public Aggregator wrap(final Aggregator in, BigArrays bigArrays) { return new WrappedAggregator(in) { @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { if (selectedBuckets == null) { throw new IllegalStateException("Collection has not been replayed yet."); } - long[] rebasedOrds = new long[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - rebasedOrds[ordIdx] = selectedBuckets.find(owningBucketOrds[ordIdx]); - if (rebasedOrds[ordIdx] == -1) { - throw new IllegalStateException("Cannot build for a bucket which has not been collected"); + try (LongArray rebasedOrds = bigArrays.newLongArray(owningBucketOrds.size())) { + for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) { + rebasedOrds.set(ordIdx, selectedBuckets.find(owningBucketOrds.get(ordIdx))); + if (rebasedOrds.get(ordIdx) == -1) { + throw new IllegalStateException("Cannot build for a bucket which has not been collected"); + } } + return in.buildAggregations(rebasedOrds); } - return in.buildAggregations(rebasedOrds); } }; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index e6c26c4278807..252eb0877d024 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -10,7 +10,9 @@ import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasable; import org.elasticsearch.search.aggregations.AggregationErrors; import org.elasticsearch.search.aggregations.Aggregator; @@ -155,22 +157,22 @@ public final long bucketDocCount(long bucketOrd) { /** * Hook to allow taking an action before building the sub agg results. */ - protected void prepareSubAggs(long[] ordsToCollect) throws IOException {} + protected void prepareSubAggs(LongArray ordsToCollect) throws IOException {} /** * Build the results of the sub-aggregations of the buckets at each of * the provided ordinals. *

* Most aggregations should probably use something like - * {@link #buildSubAggsForAllBuckets(Object[][], ToLongFunction, BiConsumer)} - * or {@link #buildAggregationsForVariableBuckets(long[], LongKeyedBucketOrds, BucketBuilderForVariable, ResultBuilderForVariable)} - * or {@link #buildAggregationsForFixedBucketCount(long[], int, BucketBuilderForFixedCount, Function)} - * or {@link #buildAggregationsForSingleBucket(long[], SingleBucketResultBuilder)} + * {@link #buildSubAggsForAllBuckets(ObjectArray, ToLongFunction, BiConsumer)} + * or {@link #buildAggregationsForVariableBuckets(LongArray, LongKeyedBucketOrds, BucketBuilderForVariable, ResultBuilderForVariable)} + * or {@link #buildAggregationsForFixedBucketCount(LongArray, int, BucketBuilderForFixedCount, Function)} + * or {@link #buildAggregationsForSingleBucket(LongArray, SingleBucketResultBuilder)} * instead of calling this directly. * @return the sub-aggregation results in the same order as the provided * array of ordinals */ - protected final IntFunction buildSubAggsForBuckets(long[] bucketOrdsToCollect) throws IOException { + protected final IntFunction buildSubAggsForBuckets(LongArray bucketOrdsToCollect) throws IOException { prepareSubAggs(bucketOrdsToCollect); InternalAggregation[][] aggregations = new InternalAggregation[subAggregators.length][]; for (int i = 0; i < subAggregators.length; i++) { @@ -204,26 +206,28 @@ public int size() { * @param setAggs how to set the sub-aggregation results on a bucket */ protected final void buildSubAggsForAllBuckets( - B[][] buckets, + ObjectArray buckets, ToLongFunction bucketToOrd, BiConsumer setAggs ) throws IOException { - int totalBucketOrdsToCollect = 0; - for (B[] bucketsForOneResult : buckets) { - totalBucketOrdsToCollect += bucketsForOneResult.length; + long totalBucketOrdsToCollect = 0; + for (long b = 0; b < buckets.size(); b++) { + totalBucketOrdsToCollect += buckets.get(b).length; } - long[] bucketOrdsToCollect = new long[totalBucketOrdsToCollect]; - int s = 0; - for (B[] bucketsForOneResult : buckets) { - for (B bucket : bucketsForOneResult) { - bucketOrdsToCollect[s++] = bucketToOrd.applyAsLong(bucket); + + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(totalBucketOrdsToCollect)) { + int s = 0; + for (long ord = 0; ord < buckets.size(); ord++) { + for (B bucket : buckets.get(ord)) { + bucketOrdsToCollect.set(s++, bucketToOrd.applyAsLong(bucket)); + } } - } - var results = buildSubAggsForBuckets(bucketOrdsToCollect); - s = 0; - for (B[] bucket : buckets) { - for (int b = 0; b < bucket.length; b++) { - setAggs.accept(bucket[b], results.apply(s++)); + var results = buildSubAggsForBuckets(bucketOrdsToCollect); + s = 0; + for (long ord = 0; ord < buckets.size(); ord++) { + for (B value : buckets.get(ord)) { + setAggs.accept(value, results.apply(s++)); + } } } } @@ -237,37 +241,38 @@ protected final void buildSubAggsForAllBuckets( * @param resultBuilder how to build a result from buckets */ protected final InternalAggregation[] buildAggregationsForFixedBucketCount( - long[] owningBucketOrds, + LongArray owningBucketOrds, int bucketsPerOwningBucketOrd, BucketBuilderForFixedCount bucketBuilder, Function, InternalAggregation> resultBuilder ) throws IOException { - int totalBuckets = owningBucketOrds.length * bucketsPerOwningBucketOrd; - long[] bucketOrdsToCollect = new long[totalBuckets]; - int bucketOrdIdx = 0; - for (long owningBucketOrd : owningBucketOrds) { - long ord = owningBucketOrd * bucketsPerOwningBucketOrd; - for (int offsetInOwningOrd = 0; offsetInOwningOrd < bucketsPerOwningBucketOrd; offsetInOwningOrd++) { - bucketOrdsToCollect[bucketOrdIdx++] = ord++; + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(owningBucketOrds.size() * bucketsPerOwningBucketOrd)) { + int bucketOrdIdx = 0; + for (long i = 0; i < owningBucketOrds.size(); i++) { + long ord = owningBucketOrds.get(i) * bucketsPerOwningBucketOrd; + for (int offsetInOwningOrd = 0; offsetInOwningOrd < bucketsPerOwningBucketOrd; offsetInOwningOrd++) { + bucketOrdsToCollect.set(bucketOrdIdx++, ord++); + } } - } - bucketOrdIdx = 0; - var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) { - List buckets = new ArrayList<>(bucketsPerOwningBucketOrd); - for (int offsetInOwningOrd = 0; offsetInOwningOrd < bucketsPerOwningBucketOrd; offsetInOwningOrd++) { - buckets.add( - bucketBuilder.build( - offsetInOwningOrd, - bucketDocCount(bucketOrdsToCollect[bucketOrdIdx]), - subAggregationResults.apply(bucketOrdIdx++) - ) - ); + bucketOrdIdx = 0; + var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); + + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int owningOrdIdx = 0; owningOrdIdx < results.length; owningOrdIdx++) { + List buckets = new ArrayList<>(bucketsPerOwningBucketOrd); + for (int offsetInOwningOrd = 0; offsetInOwningOrd < bucketsPerOwningBucketOrd; offsetInOwningOrd++) { + buckets.add( + bucketBuilder.build( + offsetInOwningOrd, + bucketDocCount(bucketOrdsToCollect.get(bucketOrdIdx)), + subAggregationResults.apply(bucketOrdIdx++) + ) + ); + } + results[owningOrdIdx] = resultBuilder.apply(buckets); } - results[owningOrdIdx] = resultBuilder.apply(buckets); + return results; } - return results; } @FunctionalInterface @@ -280,17 +285,19 @@ protected interface BucketBuilderForFixedCount { * @param owningBucketOrds owning bucket ordinals for which to build the results * @param resultBuilder how to build a result from the sub aggregation results */ - protected final InternalAggregation[] buildAggregationsForSingleBucket(long[] owningBucketOrds, SingleBucketResultBuilder resultBuilder) - throws IOException { + protected final InternalAggregation[] buildAggregationsForSingleBucket( + LongArray owningBucketOrds, + SingleBucketResultBuilder resultBuilder + ) throws IOException { /* * It'd be entirely reasonable to call * `consumeBucketsAndMaybeBreak(owningBucketOrds.length)` * here but we don't because single bucket aggs never have. */ var subAggregationResults = buildSubAggsForBuckets(owningBucketOrds); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - results[ordIdx] = resultBuilder.build(owningBucketOrds[ordIdx], subAggregationResults.apply(ordIdx)); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = resultBuilder.build(owningBucketOrds.get(ordIdx), subAggregationResults.apply(ordIdx)); } return results; } @@ -307,54 +314,60 @@ protected interface SingleBucketResultBuilder { * @param bucketOrds hash of values to the bucket ordinal */ protected final InternalAggregation[] buildAggregationsForVariableBuckets( - long[] owningBucketOrds, + LongArray owningBucketOrds, LongKeyedBucketOrds bucketOrds, BucketBuilderForVariable bucketBuilder, ResultBuilderForVariable resultBuilder ) throws IOException { long totalOrdsToCollect = 0; - final int[] bucketsInOrd = new int[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - final long bucketCount = bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]); - bucketsInOrd[ordIdx] = (int) bucketCount; - totalOrdsToCollect += bucketCount; - } - if (totalOrdsToCollect > Integer.MAX_VALUE) { - // TODO: We should instrument this error. While it is correct for it to be a 400 class IllegalArgumentException, there is not - // much the user can do about that. If this occurs with any frequency, we should do something about it. - throw new IllegalArgumentException( - "Can't collect more than [" + Integer.MAX_VALUE + "] buckets but attempted [" + totalOrdsToCollect + "]" - ); - } - long[] bucketOrdsToCollect = new long[(int) totalOrdsToCollect]; - int b = 0; - for (long owningBucketOrd : owningBucketOrds) { - LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); - while (ordsEnum.next()) { - bucketOrdsToCollect[b++] = ordsEnum.ord(); + try (IntArray bucketsInOrd = bigArrays().newIntArray(owningBucketOrds.size())) { + for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) { + final long bucketCount = bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)); + bucketsInOrd.set(ordIdx, (int) bucketCount); + totalOrdsToCollect += bucketCount; } - } - var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); - - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - b = 0; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - List buckets = new ArrayList<>(bucketsInOrd[ordIdx]); - LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - if (bucketOrdsToCollect[b] != ordsEnum.ord()) { - // If we hit this, something has gone horribly wrong and we need to investigate - throw AggregationErrors.iterationOrderChangedWithoutMutating( - bucketOrds.toString(), - ordsEnum.ord(), - bucketOrdsToCollect[b] - ); + if (totalOrdsToCollect > Integer.MAX_VALUE) { + // TODO: We should instrument this error. While it is correct for it to be a 400 class IllegalArgumentException, there is + // not + // much the user can do about that. If this occurs with any frequency, we should do something about it. + throw new IllegalArgumentException( + "Can't collect more than [" + Integer.MAX_VALUE + "] buckets but attempted [" + totalOrdsToCollect + "]" + ); + } + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(totalOrdsToCollect)) { + int b = 0; + for (long i = 0; i < owningBucketOrds.size(); i++) { + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(i)); + while (ordsEnum.next()) { + bucketOrdsToCollect.set(b++, ordsEnum.ord()); + } } - buckets.add(bucketBuilder.build(ordsEnum.value(), bucketDocCount(ordsEnum.ord()), subAggregationResults.apply(b++))); + var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); + + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + b = 0; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + final long owningBucketOrd = owningBucketOrds.get(ordIdx); + List buckets = new ArrayList<>(bucketsInOrd.get(ordIdx)); + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); + while (ordsEnum.next()) { + if (bucketOrdsToCollect.get(b) != ordsEnum.ord()) { + // If we hit this, something has gone horribly wrong and we need to investigate + throw AggregationErrors.iterationOrderChangedWithoutMutating( + bucketOrds.toString(), + ordsEnum.ord(), + bucketOrdsToCollect.get(b) + ); + } + buckets.add( + bucketBuilder.build(ordsEnum.value(), bucketDocCount(ordsEnum.ord()), subAggregationResults.apply(b++)) + ); + } + results[ordIdx] = resultBuilder.build(owningBucketOrd, buckets); + } + return results; } - results[ordIdx] = resultBuilder.build(owningBucketOrds[ordIdx], buckets); } - return results; } @FunctionalInterface diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferableBucketAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferableBucketAggregator.java index 84a15b6d1c0eb..64744b705e222 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferableBucketAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferableBucketAggregator.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.aggregations.bucket; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.BucketCollector; @@ -65,7 +66,7 @@ protected void doPreCollection() throws IOException { } deferredAggregations.add(subAggregators[i]); deferredAggregationNames.add(subAggregators[i].name()); - subAggregators[i] = deferringCollector.wrap(subAggregators[i]); + subAggregators[i] = deferringCollector.wrap(subAggregators[i], bigArrays()); } else { collectors.add(subAggregators[i]); } @@ -87,7 +88,7 @@ protected DeferringBucketCollector deferringCollector() { /** * Build the {@link DeferringBucketCollector}. The default implementation * replays all hits against the buckets selected by - * {#link {@link DeferringBucketCollector#prepareSelectedBuckets(long...)}. + * {#link {@link DeferringBucketCollector#prepareSelectedBuckets(LongArray)}. */ protected DeferringBucketCollector buildDeferringCollector() { return new BestBucketsDeferringCollector(topLevelQuery(), searcher(), descendsFromGlobalAggregator(parent())); @@ -107,7 +108,7 @@ protected boolean shouldDefer(Aggregator aggregator) { } @Override - protected final void prepareSubAggs(long[] bucketOrdsToCollect) throws IOException { + protected final void prepareSubAggs(LongArray bucketOrdsToCollect) throws IOException { if (deferringCollector != null) { deferringCollector.prepareSelectedBuckets(bucketOrdsToCollect); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java index 44cff2651e273..468fec29a9420 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/DeferringBucketCollector.java @@ -10,6 +10,8 @@ package org.elasticsearch.search.aggregations.bucket; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.BucketCollector; @@ -37,13 +39,13 @@ public DeferringBucketCollector() {} /** * Replay the deferred hits on the selected buckets. */ - public abstract void prepareSelectedBuckets(long... selectedBuckets) throws IOException; + public abstract void prepareSelectedBuckets(LongArray selectedBuckets) throws IOException; /** * Wrap the provided aggregator so that it behaves (almost) as if it had * been collected directly. */ - public Aggregator wrap(final Aggregator in) { + public Aggregator wrap(final Aggregator in, BigArrays bigArrays) { return new WrappedAggregator(in); } @@ -80,7 +82,7 @@ public Aggregator subAggregator(String name) { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return in.buildAggregations(owningBucketOrds); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java index 2e9e04eca4afc..67bf4bbd29eda 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -35,6 +35,7 @@ import org.apache.lucene.util.RoaringDocIdSet; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.core.Strings; import org.elasticsearch.index.IndexSortConfig; @@ -184,50 +185,51 @@ protected void doPostCollection() throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { // Composite aggregator must be at the top of the aggregation tree - assert owningBucketOrds.length == 1 && owningBucketOrds[0] == 0L; + assert owningBucketOrds.size() == 1 && owningBucketOrds.get(0) == 0L; if (deferredCollectors != NO_OP_BUCKET_COLLECTOR) { // Replay all documents that contain at least one top bucket (collected during the first pass). runDeferredCollections(); } - int num = Math.min(size, (int) queue.size()); + final int num = Math.min(size, (int) queue.size()); final InternalComposite.InternalBucket[] buckets = new InternalComposite.InternalBucket[num]; - long[] bucketOrdsToCollect = new long[(int) queue.size()]; - for (int i = 0; i < queue.size(); i++) { - bucketOrdsToCollect[i] = i; - } - var subAggsForBuckets = buildSubAggsForBuckets(bucketOrdsToCollect); - while (queue.size() > 0) { - int slot = queue.pop(); - CompositeKey key = queue.toCompositeKey(slot); - InternalAggregations aggs = subAggsForBuckets.apply(slot); - long docCount = queue.getDocCount(slot); - buckets[(int) queue.size()] = new InternalComposite.InternalBucket( - sourceNames, - formats, - key, - reverseMuls, - missingOrders, - docCount, - aggs - ); + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(queue.size())) { + for (int i = 0; i < queue.size(); i++) { + bucketOrdsToCollect.set(i, i); + } + var subAggsForBuckets = buildSubAggsForBuckets(bucketOrdsToCollect); + while (queue.size() > 0) { + int slot = queue.pop(); + CompositeKey key = queue.toCompositeKey(slot); + InternalAggregations aggs = subAggsForBuckets.apply(slot); + long docCount = queue.getDocCount(slot); + buckets[(int) queue.size()] = new InternalComposite.InternalBucket( + sourceNames, + formats, + key, + reverseMuls, + missingOrders, + docCount, + aggs + ); + } + CompositeKey lastBucket = num > 0 ? buckets[num - 1].getRawKey() : null; + return new InternalAggregation[] { + new InternalComposite( + name, + size, + sourceNames, + formats, + Arrays.asList(buckets), + lastBucket, + reverseMuls, + missingOrders, + earlyTerminated, + metadata() + ) }; } - CompositeKey lastBucket = num > 0 ? buckets[num - 1].getRawKey() : null; - return new InternalAggregation[] { - new InternalComposite( - name, - size, - sourceNames, - formats, - Arrays.asList(buckets), - lastBucket, - reverseMuls, - missingOrders, - earlyTerminated, - metadata() - ) }; } @Override @@ -244,6 +246,7 @@ public InternalAggregation buildEmptyAggregation() { false, metadata() ); + } private void finishLeaf() { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/countedterms/CountedTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/countedterms/CountedTermsAggregator.java index f774f67b3df8f..aaf852203dd7d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/countedterms/CountedTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/countedterms/CountedTermsAggregator.java @@ -13,6 +13,8 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.AggregationExecutionContext; @@ -108,70 +110,80 @@ private void collectOrdinal(long bucketOrdinal, int doc, LeafBucketCollector sub } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - StringTerms.Bucket[][] topBucketsPerOrd = new StringTerms.Bucket[owningBucketOrds.length][]; - long[] otherDocCounts = new long[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); - - // as users can't control sort order, in practice we'll always sort by doc count descending - try ( - BucketPriorityQueue ordered = new BucketPriorityQueue<>( - size, - bigArrays(), - partiallyBuiltBucketComparator - ) - ) { - StringTerms.Bucket spare = null; - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - Supplier emptyBucketBuilder = () -> new StringTerms.Bucket(new BytesRef(), 0, null, false, 0, format); - while (ordsEnum.next()) { - long docCount = bucketDocCount(ordsEnum.ord()); - otherDocCounts[ordIdx] += docCount; - if (spare == null) { - spare = emptyBucketBuilder.get(); + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try ( + LongArray otherDocCounts = bigArrays().newLongArray(owningBucketOrds.size()); + ObjectArray topBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size()) + ) { + for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { + int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); + + // as users can't control sort order, in practice we'll always sort by doc count descending + try ( + BucketPriorityQueue ordered = new BucketPriorityQueue<>( + size, + bigArrays(), + partiallyBuiltBucketComparator + ) + ) { + StringTerms.Bucket spare = null; + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); + Supplier emptyBucketBuilder = () -> new StringTerms.Bucket( + new BytesRef(), + 0, + null, + false, + 0, + format + ); + while (ordsEnum.next()) { + long docCount = bucketDocCount(ordsEnum.ord()); + otherDocCounts.increment(ordIdx, docCount); + if (spare == null) { + spare = emptyBucketBuilder.get(); + } + ordsEnum.readValue(spare.getTermBytes()); + spare.setDocCount(docCount); + spare.setBucketOrd(ordsEnum.ord()); + spare = ordered.insertWithOverflow(spare); } - ordsEnum.readValue(spare.getTermBytes()); - spare.setDocCount(docCount); - spare.setBucketOrd(ordsEnum.ord()); - spare = ordered.insertWithOverflow(spare); - } - topBucketsPerOrd[ordIdx] = new StringTerms.Bucket[(int) ordered.size()]; - for (int i = (int) ordered.size() - 1; i >= 0; --i) { - topBucketsPerOrd[ordIdx][i] = ordered.pop(); - otherDocCounts[ordIdx] -= topBucketsPerOrd[ordIdx][i].getDocCount(); - topBucketsPerOrd[ordIdx][i].setTermBytes(BytesRef.deepCopyOf(topBucketsPerOrd[ordIdx][i].getTermBytes())); + topBucketsPerOrd.set(ordIdx, new StringTerms.Bucket[(int) ordered.size()]); + for (int i = (int) ordered.size() - 1; i >= 0; --i) { + topBucketsPerOrd.get(ordIdx)[i] = ordered.pop(); + otherDocCounts.increment(ordIdx, -topBucketsPerOrd.get(ordIdx)[i].getDocCount()); + topBucketsPerOrd.get(ordIdx)[i].setTermBytes(BytesRef.deepCopyOf(topBucketsPerOrd.get(ordIdx)[i].getTermBytes())); + } } } - } - buildSubAggsForAllBuckets(topBucketsPerOrd, InternalTerms.Bucket::getBucketOrd, InternalTerms.Bucket::setAggregations); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - final BucketOrder reduceOrder; - if (isKeyOrder(order) == false) { - reduceOrder = InternalOrder.key(true); - Arrays.sort(topBucketsPerOrd[ordIdx], reduceOrder.comparator()); - } else { - reduceOrder = order; + buildSubAggsForAllBuckets(topBucketsPerOrd, InternalTerms.Bucket::getBucketOrd, InternalTerms.Bucket::setAggregations); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(topBucketsPerOrd.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + final BucketOrder reduceOrder; + if (isKeyOrder(order) == false) { + reduceOrder = InternalOrder.key(true); + Arrays.sort(topBucketsPerOrd.get(ordIdx), reduceOrder.comparator()); + } else { + reduceOrder = order; + } + result[ordIdx] = new StringTerms( + name, + reduceOrder, + order, + bucketCountThresholds.getRequiredSize(), + bucketCountThresholds.getMinDocCount(), + metadata(), + format, + bucketCountThresholds.getShardSize(), + false, + otherDocCounts.get(ordIdx), + Arrays.asList(topBucketsPerOrd.get(ordIdx)), + null + ); } - result[ordIdx] = new StringTerms( - name, - reduceOrder, - order, - bucketCountThresholds.getRequiredSize(), - bucketCountThresholds.getMinDocCount(), - metadata(), - format, - bucketCountThresholds.getShardSize(), - false, - otherDocCounts[ordIdx], - Arrays.asList(topBucketsPerOrd[ordIdx]), - null - ); + return result; } - return result; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java index fede97c7fddee..69eff3630a8f4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/FiltersAggregator.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.aggregations.AggregationExecutionContext; @@ -208,7 +209,7 @@ List filters() { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForFixedBucketCount( owningBucketOrds, filters.size() + (otherBucketKey == null ? 0 : 1), diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java index cde26bb2214ed..0e63e26e77a55 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java @@ -12,6 +12,8 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -132,39 +134,40 @@ public void collect(int doc, long owningBucketOrd) throws IOException { protected abstract InternalGeoGridBucket newEmptyBucket(); @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - InternalGeoGridBucket[][] topBucketsPerOrd = new InternalGeoGridBucket[owningBucketOrds.length][]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - int size = (int) Math.min(bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]), shardSize); - - try (BucketPriorityQueue ordered = new BucketPriorityQueue<>(size, bigArrays())) { - InternalGeoGridBucket spare = null; - LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - if (spare == null) { - spare = newEmptyBucket(); - } + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try (ObjectArray topBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size())) { + for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { + int size = (int) Math.min(bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)), shardSize); + + try (BucketPriorityQueue ordered = new BucketPriorityQueue<>(size, bigArrays())) { + InternalGeoGridBucket spare = null; + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); + while (ordsEnum.next()) { + if (spare == null) { + spare = newEmptyBucket(); + } - // need a special function to keep the source bucket - // up-to-date so it can get the appropriate key - spare.hashAsLong = ordsEnum.value(); - spare.docCount = bucketDocCount(ordsEnum.ord()); - spare.bucketOrd = ordsEnum.ord(); - spare = ordered.insertWithOverflow(spare); - } + // need a special function to keep the source bucket + // up-to-date so it can get the appropriate key + spare.hashAsLong = ordsEnum.value(); + spare.docCount = bucketDocCount(ordsEnum.ord()); + spare.bucketOrd = ordsEnum.ord(); + spare = ordered.insertWithOverflow(spare); + } - topBucketsPerOrd[ordIdx] = new InternalGeoGridBucket[(int) ordered.size()]; - for (int i = (int) ordered.size() - 1; i >= 0; --i) { - topBucketsPerOrd[ordIdx][i] = ordered.pop(); + topBucketsPerOrd.set(ordIdx, new InternalGeoGridBucket[(int) ordered.size()]); + for (int i = (int) ordered.size() - 1; i >= 0; --i) { + topBucketsPerOrd.get(ordIdx)[i] = ordered.pop(); + } } } + buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(topBucketsPerOrd.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = buildAggregation(name, requiredSize, Arrays.asList(topBucketsPerOrd.get(ordIdx)), metadata()); + } + return results; } - buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - results[ordIdx] = buildAggregation(name, requiredSize, Arrays.asList(topBucketsPerOrd[ordIdx]), metadata()); - } - return results; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/global/GlobalAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/global/GlobalAggregator.java index 4c87b5961ac1a..295512e09391b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/global/GlobalAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/global/GlobalAggregator.java @@ -13,6 +13,7 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Scorable; import org.apache.lucene.search.Weight; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.CardinalityUpperBound; @@ -60,8 +61,8 @@ public void setScorer(Scorable scorer) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - assert owningBucketOrds.length == 1 && owningBucketOrds[0] == 0 : "global aggregator can only be a top level aggregator"; + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + assert owningBucketOrds.size() == 1 && owningBucketOrds.get(0) == 0 : "global aggregator can only be a top level aggregator"; return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalGlobal( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java index b81d8b002b6b2..ed687df6377dd 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramAggregator.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.aggregations.bucket.histogram; import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; @@ -79,7 +80,7 @@ public AbstractHistogramAggregator( } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForVariableBuckets(owningBucketOrds, bucketOrds, (bucketValue, docCount, subAggregationResults) -> { double roundKey = Double.longBitsToDouble(bucketValue); double key = roundKey * interval + offset; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 86c320d8dc319..cc2db63fa5ec5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -17,6 +17,7 @@ import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.common.Rounding; import org.elasticsearch.common.Rounding.DateTimeUnit; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasables; @@ -337,7 +338,7 @@ private void addRoundedValue(long rounded, int doc, long owningBucketOrd, LeafBu } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForVariableBuckets(owningBucketOrds, bucketOrds, (bucketValue, docCount, subAggregationResults) -> { return new InternalDateHistogram.Bucket(bucketValue, docCount, keyed, formatter, subAggregationResults); }, (owningBucketOrd, buckets) -> { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java index 2bfd85e5fe03a..f385f7c34f6b7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/DateRangeHistogramAggregator.java @@ -12,6 +12,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.common.Rounding; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.fielddata.FieldData; @@ -163,7 +164,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForVariableBuckets( owningBucketOrds, bucketOrds, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregator.java index 1afb06067f770..86ec1666e2cea 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/VariableWidthHistogramAggregator.java @@ -14,6 +14,7 @@ import org.apache.lucene.util.InPlaceMergeSorter; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.DoubleArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -565,34 +566,35 @@ public void collect(int doc, long bucket) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { int numClusters = collector.finalNumBuckets(); - long[] bucketOrdsToCollect = new long[numClusters]; - for (int i = 0; i < numClusters; i++) { - bucketOrdsToCollect[i] = i; - } + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(numClusters)) { + for (int i = 0; i < numClusters; i++) { + bucketOrdsToCollect.set(i, i); + } - var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); + var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); - List buckets = new ArrayList<>(numClusters); - for (int bucketOrd = 0; bucketOrd < numClusters; bucketOrd++) { - buckets.add(collector.buildBucket(bucketOrd, subAggregationResults.apply(bucketOrd))); - } + List buckets = new ArrayList<>(numClusters); + for (int bucketOrd = 0; bucketOrd < numClusters; bucketOrd++) { + buckets.add(collector.buildBucket(bucketOrd, subAggregationResults.apply(bucketOrd))); + } - Function, InternalAggregation> resultBuilder = bucketsToFormat -> { - // The contract of the histogram aggregation is that shards must return - // buckets ordered by centroid in ascending order - CollectionUtil.introSort(bucketsToFormat, BucketOrder.key(true).comparator()); + Function, InternalAggregation> resultBuilder = bucketsToFormat -> { + // The contract of the histogram aggregation is that shards must return + // buckets ordered by centroid in ascending order + CollectionUtil.introSort(bucketsToFormat, BucketOrder.key(true).comparator()); - InternalVariableWidthHistogram.EmptyBucketInfo emptyBucketInfo = new InternalVariableWidthHistogram.EmptyBucketInfo( - buildEmptySubAggregations() - ); + InternalVariableWidthHistogram.EmptyBucketInfo emptyBucketInfo = new InternalVariableWidthHistogram.EmptyBucketInfo( + buildEmptySubAggregations() + ); - return new InternalVariableWidthHistogram(name, bucketsToFormat, emptyBucketInfo, numBuckets, formatter, metadata()); - }; + return new InternalVariableWidthHistogram(name, bucketsToFormat, emptyBucketInfo, numBuckets, formatter, metadata()); + }; - return new InternalAggregation[] { resultBuilder.apply(buckets) }; + return new InternalAggregation[] { resultBuilder.apply(buckets) }; + } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/missing/MissingAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/missing/MissingAggregator.java index 5c8f8ab9c562e..b49668e45b889 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/missing/MissingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/missing/MissingAggregator.java @@ -8,6 +8,7 @@ */ package org.elasticsearch.search.aggregations.bucket.missing; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.fielddata.DocValueBits; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -67,7 +68,7 @@ public void collect(int doc, long bucket) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalMissing( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java index 12182a5931a4f..1aef1fbc43ac8 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/NestedAggregator.java @@ -21,6 +21,7 @@ import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.util.BitSet; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -124,7 +125,7 @@ private void processBufferedDocs() throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalNested( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java index 0e3e4679c7a2d..2477b67367e14 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java @@ -13,6 +13,7 @@ import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.util.BitSet; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; @@ -86,7 +87,7 @@ public void collect(int childDoc, long bucket) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalReverseNested( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/IpPrefixAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/IpPrefixAggregator.java index 9548cd871e161..e8ba0393208a0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/IpPrefixAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/IpPrefixAggregator.java @@ -12,6 +12,8 @@ import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; +import org.elasticsearch.common.util.IntArray; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; @@ -160,57 +162,63 @@ private static void maskIpAddress(final BytesRef ipAddress, final BytesRef subne } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { long totalOrdsToCollect = 0; - final int[] bucketsInOrd = new int[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - final long bucketCount = bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]); - bucketsInOrd[ordIdx] = (int) bucketCount; - totalOrdsToCollect += bucketCount; - } - - long[] bucketOrdsToCollect = new long[(int) totalOrdsToCollect]; - int b = 0; - for (long owningBucketOrd : owningBucketOrds) { - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); - while (ordsEnum.next()) { - bucketOrdsToCollect[b++] = ordsEnum.ord(); + try (IntArray bucketsInOrd = bigArrays().newIntArray(owningBucketOrds.size())) { + for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) { + final long bucketCount = bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)); + bucketsInOrd.set(ordIdx, (int) bucketCount); + totalOrdsToCollect += bucketCount; } - } - var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - b = 0; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - List buckets = new ArrayList<>(bucketsInOrd[ordIdx]); - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - long ordinal = ordsEnum.ord(); - if (bucketOrdsToCollect[b] != ordinal) { - throw AggregationErrors.iterationOrderChangedWithoutMutating(bucketOrds.toString(), ordinal, bucketOrdsToCollect[b]); + try (LongArray bucketOrdsToCollect = bigArrays().newLongArray(totalOrdsToCollect)) { + int b = 0; + for (long i = 0; i < owningBucketOrds.size(); i++) { + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(i)); + while (ordsEnum.next()) { + bucketOrdsToCollect.set(b++, ordsEnum.ord()); + } + } + + var subAggregationResults = buildSubAggsForBuckets(bucketOrdsToCollect); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + b = 0; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + List buckets = new ArrayList<>(bucketsInOrd.get(ordIdx)); + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); + while (ordsEnum.next()) { + long ordinal = ordsEnum.ord(); + if (bucketOrdsToCollect.get(b) != ordinal) { + throw AggregationErrors.iterationOrderChangedWithoutMutating( + bucketOrds.toString(), + ordinal, + bucketOrdsToCollect.get(b) + ); + } + BytesRef ipAddress = new BytesRef(); + ordsEnum.readValue(ipAddress); + long docCount = bucketDocCount(ordinal); + buckets.add( + new InternalIpPrefix.Bucket( + config.format(), + BytesRef.deepCopyOf(ipAddress), + keyed, + ipPrefix.isIpv6, + ipPrefix.prefixLength, + ipPrefix.appendPrefixLength, + docCount, + subAggregationResults.apply(b++) + ) + ); + + // NOTE: the aggregator is expected to return sorted results + CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); + } + results[ordIdx] = new InternalIpPrefix(name, config.format(), keyed, minDocCount, buckets, metadata()); } - BytesRef ipAddress = new BytesRef(); - ordsEnum.readValue(ipAddress); - long docCount = bucketDocCount(ordinal); - buckets.add( - new InternalIpPrefix.Bucket( - config.format(), - BytesRef.deepCopyOf(ipAddress), - keyed, - ipPrefix.isIpv6, - ipPrefix.prefixLength, - ipPrefix.appendPrefixLength, - docCount, - subAggregationResults.apply(b++) - ) - ); - - // NOTE: the aggregator is expected to return sorted results - CollectionUtil.introSort(buckets, BucketOrder.key(true).comparator()); + return results; } - results[ordIdx] = new InternalIpPrefix(name, config.format(), keyed, minDocCount, buckets, metadata()); } - return results; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregator.java index 2f18d2dc1e42e..7bd817a489237 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/BinaryRangeAggregator.java @@ -14,6 +14,7 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.search.DocValueFormat; @@ -358,7 +359,7 @@ private interface DocCollector { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForFixedBucketCount( owningBucketOrds, ranges.length, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java index 6d63bb786c29f..0654a788a10a9 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregator.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.fielddata.FieldData; import org.elasticsearch.index.fielddata.NumericDoubleValues; @@ -531,7 +532,7 @@ protected long subBucketOrdinal(long owningBucketOrdinal, int rangeOrd) { @Override @SuppressWarnings("unchecked") - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForFixedBucketCount( owningBucketOrds, ranges.length, diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java index 0cfad5ba9e0c7..e4341a8135a4b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollector.java @@ -19,6 +19,7 @@ import org.apache.lucene.util.RamUsageEstimator; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -120,7 +121,7 @@ public void postCollection() throws IOException { } @Override - public void prepareSelectedBuckets(long... selectedBuckets) throws IOException { + public void prepareSelectedBuckets(LongArray selectedBuckets) { // no-op - deferred aggs processed in postCollection call } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java index 78b2cdfe7655d..a4c06a194fbf7 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/SamplerAggregator.java @@ -11,6 +11,7 @@ import org.apache.lucene.misc.search.DiversifiedTopDocsCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.aggregations.AggregationExecutionContext; @@ -212,7 +213,7 @@ protected boolean shouldDefer(Aggregator aggregator) { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalSampler( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java index fc03786356f87..921cbb96385ad 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/sampler/random/RandomSamplerAggregator.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.CheckedSupplier; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -60,7 +61,7 @@ public class RandomSamplerAggregator extends BucketsAggregator implements Single } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return buildAggregationsForSingleBucket( owningBucketOrds, (owningBucketOrd, subAggregationResults) -> new InternalRandomSampler( diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 1b0ec8e356082..fc3e7d092168d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -23,6 +23,7 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongHash; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.ObjectArrayPriorityQueue; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; @@ -190,7 +191,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); } @@ -692,61 +693,66 @@ abstract class ResultStrategy< B extends InternalMultiBucketAggregation.InternalBucket, TB extends InternalMultiBucketAggregation.InternalBucket> implements Releasable { - private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + if (valueCount == 0) { // no context in this reader - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - results[ordIdx] = buildNoValuesResult(owningBucketOrds[ordIdx]); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = buildNoValuesResult(owningBucketOrds.get(ordIdx)); } return results; } - - B[][] topBucketsPreOrd = buildTopBucketsPerOrd(owningBucketOrds.length); - long[] otherDocCount = new long[owningBucketOrds.length]; - GlobalOrdLookupFunction lookupGlobalOrd = valuesSupplier.get()::lookupOrd; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - final int size; - if (bucketCountThresholds.getMinDocCount() == 0) { - // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns - size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize()); - } else { - size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); - } - try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { - final int finalOrdIdx = ordIdx; - BucketUpdater updater = bucketUpdater(owningBucketOrds[ordIdx], lookupGlobalOrd); - collectionStrategy.forEach(owningBucketOrds[ordIdx], new BucketInfoConsumer() { - TB spare = null; - - @Override - public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException { - otherDocCount[finalOrdIdx] += docCount; - if (docCount >= bucketCountThresholds.getShardMinDocCount()) { - if (spare == null) { - spare = buildEmptyTemporaryBucket(); + try ( + LongArray otherDocCount = bigArrays().newLongArray(owningBucketOrds.size(), true); + ObjectArray topBucketsPreOrd = buildTopBucketsPerOrd(owningBucketOrds.size()) + ) { + GlobalOrdLookupFunction lookupGlobalOrd = valuesSupplier.get()::lookupOrd; + for (long ordIdx = 0; ordIdx < topBucketsPreOrd.size(); ordIdx++) { + final int size; + if (bucketCountThresholds.getMinDocCount() == 0) { + // if minDocCount == 0 then we can end up with more buckets then maxBucketOrd() returns + size = (int) Math.min(valueCount, bucketCountThresholds.getShardSize()); + } else { + size = (int) Math.min(maxBucketOrd(), bucketCountThresholds.getShardSize()); + } + try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { + final long finalOrdIdx = ordIdx; + final long owningBucketOrd = owningBucketOrds.get(ordIdx); + BucketUpdater updater = bucketUpdater(owningBucketOrd, lookupGlobalOrd); + collectionStrategy.forEach(owningBucketOrd, new BucketInfoConsumer() { + TB spare = null; + + @Override + public void accept(long globalOrd, long bucketOrd, long docCount) throws IOException { + otherDocCount.increment(finalOrdIdx, docCount); + if (docCount >= bucketCountThresholds.getShardMinDocCount()) { + if (spare == null) { + spare = buildEmptyTemporaryBucket(); + } + updater.updateBucket(spare, globalOrd, bucketOrd, docCount); + spare = ordered.insertWithOverflow(spare); } - updater.updateBucket(spare, globalOrd, bucketOrd, docCount); - spare = ordered.insertWithOverflow(spare); } + }); + + // Get the top buckets + topBucketsPreOrd.set(ordIdx, buildBuckets((int) ordered.size())); + for (int i = (int) ordered.size() - 1; i >= 0; --i) { + B bucket = convertTempBucketToRealBucket(ordered.pop(), lookupGlobalOrd); + topBucketsPreOrd.get(ordIdx)[i] = bucket; + otherDocCount.increment(ordIdx, -bucket.getDocCount()); } - }); - - // Get the top buckets - topBucketsPreOrd[ordIdx] = buildBuckets((int) ordered.size()); - for (int i = (int) ordered.size() - 1; i >= 0; --i) { - topBucketsPreOrd[ordIdx][i] = convertTempBucketToRealBucket(ordered.pop(), lookupGlobalOrd); - otherDocCount[ordIdx] -= topBucketsPreOrd[ordIdx][i].getDocCount(); } } - } - buildSubAggs(topBucketsPreOrd); + buildSubAggs(topBucketsPreOrd); - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - results[ordIdx] = buildResult(owningBucketOrds[ordIdx], otherDocCount[ordIdx], topBucketsPreOrd[ordIdx]); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(topBucketsPreOrd.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = buildResult(owningBucketOrds.get(ordIdx), otherDocCount.get(ordIdx), topBucketsPreOrd.get(ordIdx)); + } + return results; } - return results; } /** @@ -781,7 +787,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep /** * Build an array to hold the "top" buckets for each ordinal. */ - abstract B[][] buildTopBucketsPerOrd(int size); + abstract ObjectArray buildTopBucketsPerOrd(long size); /** * Build an array of buckets for a particular ordinal to collect the @@ -798,7 +804,7 @@ public void accept(long globalOrd, long bucketOrd, long docCount) throws IOExcep * Build the sub-aggregations into the buckets. This will usually * delegate to {@link #buildSubAggsForAllBuckets}. */ - abstract void buildSubAggs(B[][] topBucketsPreOrd) throws IOException; + abstract void buildSubAggs(ObjectArray topBucketsPreOrd) throws IOException; /** * Turn the buckets into an aggregation result. @@ -837,8 +843,8 @@ LeafBucketCollector wrapCollector(LeafBucketCollector primary) { } @Override - StringTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new StringTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -875,7 +881,7 @@ StringTerms.Bucket convertTempBucketToRealBucket(OrdBucket temp, GlobalOrdLookup } @Override - void buildSubAggs(StringTerms.Bucket[][] topBucketsPreOrd) throws IOException { + void buildSubAggs(ObjectArray topBucketsPreOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPreOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } @@ -969,8 +975,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } @Override - SignificantStringTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new SignificantStringTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -1022,7 +1028,7 @@ SignificantStringTerms.Bucket convertTempBucketToRealBucket( } @Override - void buildSubAggs(SignificantStringTerms.Bucket[][] topBucketsPreOrd) throws IOException { + void buildSubAggs(ObjectArray topBucketsPreOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPreOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java index a60911b466847..eeb7305ac51fa 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java @@ -62,7 +62,7 @@ public interface Reader> { long supersetSize; /** * Ordinal of the bucket while it is being built. Not used after it is - * returned from {@link Aggregator#buildAggregations(long[])} and not + * returned from {@link Aggregator#buildAggregations(org.elasticsearch.common.util.LongArray)} and not * serialized. */ transient long bucketOrd; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java index 651705bd71ef8..877bd2cac4b05 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java @@ -12,7 +12,9 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongHash; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.SetBackedScalingCuckooFilter; import org.elasticsearch.core.Releasables; import org.elasticsearch.search.DocValueFormat; @@ -118,70 +120,75 @@ private void collectValue(long val, int docId, long owningBucketOrd, LeafBucketC } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { /* * Collect the list of buckets, populate the filter with terms * that are too frequent, and figure out how to merge sub-buckets. */ - LongRareTerms.Bucket[][] rarestPerOrd = new LongRareTerms.Bucket[owningBucketOrds.length][]; - SetBackedScalingCuckooFilter[] filters = new SetBackedScalingCuckooFilter[owningBucketOrds.length]; - long keepCount = 0; - long[] mergeMap = new long[(int) bucketOrds.size()]; - Arrays.fill(mergeMap, -1); - long offset = 0; - for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) { - try (LongHash bucketsInThisOwningBucketToCollect = new LongHash(1, bigArrays())) { - filters[owningOrdIdx] = newFilter(); - List builtBuckets = new ArrayList<>(); - LongKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]); - while (collectedBuckets.next()) { - long docCount = bucketDocCount(collectedBuckets.ord()); - // if the key is below threshold, reinsert into the new ords - if (docCount <= maxDocCount) { - LongRareTerms.Bucket bucket = new LongRareTerms.Bucket(collectedBuckets.value(), docCount, null, format); - bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(collectedBuckets.value()); - mergeMap[(int) collectedBuckets.ord()] = bucket.bucketOrd; - builtBuckets.add(bucket); - keepCount++; - } else { - filters[owningOrdIdx].add(collectedBuckets.value()); + try ( + ObjectArray rarestPerOrd = bigArrays().newObjectArray(owningBucketOrds.size()); + ObjectArray filters = bigArrays().newObjectArray(owningBucketOrds.size()) + ) { + try (LongArray mergeMap = bigArrays().newLongArray(bucketOrds.size())) { + mergeMap.fill(0, mergeMap.size(), -1); + long keepCount = 0; + long offset = 0; + for (long owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.size(); owningOrdIdx++) { + try (LongHash bucketsInThisOwningBucketToCollect = new LongHash(1, bigArrays())) { + filters.set(owningOrdIdx, newFilter()); + List builtBuckets = new ArrayList<>(); + LongKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds.get(owningOrdIdx)); + while (collectedBuckets.next()) { + long docCount = bucketDocCount(collectedBuckets.ord()); + // if the key is below threshold, reinsert into the new ords + if (docCount <= maxDocCount) { + LongRareTerms.Bucket bucket = new LongRareTerms.Bucket(collectedBuckets.value(), docCount, null, format); + bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(collectedBuckets.value()); + mergeMap.set(collectedBuckets.ord(), bucket.bucketOrd); + builtBuckets.add(bucket); + keepCount++; + } else { + filters.get(owningOrdIdx).add(collectedBuckets.value()); + } + } + rarestPerOrd.set(owningOrdIdx, builtBuckets.toArray(LongRareTerms.Bucket[]::new)); + offset += bucketsInThisOwningBucketToCollect.size(); } } - rarestPerOrd[owningOrdIdx] = builtBuckets.toArray(LongRareTerms.Bucket[]::new); - offset += bucketsInThisOwningBucketToCollect.size(); - } - } - /* - * Only merge/delete the ordinals if we have actually deleted one, - * to save on some redundant work. - */ - if (keepCount != mergeMap.length) { - LongUnaryOperator howToMerge = b -> mergeMap[(int) b]; - rewriteBuckets(offset, howToMerge); - if (deferringCollector() != null) { - ((BestBucketsDeferringCollector) deferringCollector()).rewriteBuckets(howToMerge); + /* + * Only merge/delete the ordinals if we have actually deleted one, + * to save on some redundant work. + */ + if (keepCount != mergeMap.size()) { + LongUnaryOperator howToMerge = mergeMap::get; + rewriteBuckets(offset, howToMerge); + if (deferringCollector() != null) { + ((BestBucketsDeferringCollector) deferringCollector()).rewriteBuckets(howToMerge); + } + } } - } - /* - * Now build the results! - */ - buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - Arrays.sort(rarestPerOrd[ordIdx], ORDER.comparator()); - result[ordIdx] = new LongRareTerms( - name, - ORDER, - metadata(), - format, - Arrays.asList(rarestPerOrd[ordIdx]), - maxDocCount, - filters[ordIdx] - ); + /* + * Now build the results! + */ + buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + LongRareTerms.Bucket[] buckets = rarestPerOrd.get(ordIdx); + Arrays.sort(buckets, ORDER.comparator()); + result[ordIdx] = new LongRareTerms( + name, + ORDER, + metadata(), + format, + Arrays.asList(buckets), + maxDocCount, + filters.get(ordIdx) + ); + } + return result; } - return result; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 76202b6386a73..c02ed5509e6ae 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -18,6 +18,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.ObjectArrayPriorityQueue; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -117,7 +118,7 @@ public LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); } @@ -282,45 +283,49 @@ abstract class ResultStrategy ordered = buildPriorityQueue(size)) { - B spare = null; - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - long docCount = bucketDocCount(ordsEnum.ord()); - otherDocCounts[ordIdx] += docCount; - if (docCount < bucketCountThresholds.getShardMinDocCount()) { - continue; - } - if (spare == null) { - spare = emptyBucketBuilder.get(); + private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try ( + LongArray otherDocCounts = bigArrays().newLongArray(owningBucketOrds.size(), true); + ObjectArray topBucketsPerOrd = buildTopBucketsPerOrd(Math.toIntExact(owningBucketOrds.size())) + ) { + for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { + long owningOrd = owningBucketOrds.get(ordIdx); + collectZeroDocEntriesIfNeeded(owningOrd, excludeDeletedDocs); + int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize()); + + try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { + B spare = null; + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningOrd); + Supplier emptyBucketBuilder = emptyBucketBuilder(owningOrd); + while (ordsEnum.next()) { + long docCount = bucketDocCount(ordsEnum.ord()); + otherDocCounts.increment(ordIdx, docCount); + if (docCount < bucketCountThresholds.getShardMinDocCount()) { + continue; + } + if (spare == null) { + spare = emptyBucketBuilder.get(); + } + updateBucket(spare, ordsEnum, docCount); + spare = ordered.insertWithOverflow(spare); } - updateBucket(spare, ordsEnum, docCount); - spare = ordered.insertWithOverflow(spare); - } - topBucketsPerOrd[ordIdx] = buildBuckets((int) ordered.size()); - for (int i = (int) ordered.size() - 1; i >= 0; --i) { - topBucketsPerOrd[ordIdx][i] = ordered.pop(); - otherDocCounts[ordIdx] -= topBucketsPerOrd[ordIdx][i].getDocCount(); - finalizeBucket(topBucketsPerOrd[ordIdx][i]); + topBucketsPerOrd.set(ordIdx, buildBuckets((int) ordered.size())); + for (int i = (int) ordered.size() - 1; i >= 0; --i) { + topBucketsPerOrd.get(ordIdx)[i] = ordered.pop(); + otherDocCounts.increment(ordIdx, -topBucketsPerOrd.get(ordIdx)[i].getDocCount()); + finalizeBucket(topBucketsPerOrd.get(ordIdx)[i]); + } } } - } - buildSubAggs(topBucketsPerOrd); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - result[ordIdx] = buildResult(owningBucketOrds[ordIdx], otherDocCounts[ordIdx], topBucketsPerOrd[ordIdx]); + buildSubAggs(topBucketsPerOrd); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(topBucketsPerOrd.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + result[ordIdx] = buildResult(owningBucketOrds.get(ordIdx), otherDocCounts.get(ordIdx), topBucketsPerOrd.get(ordIdx)); + } + return result; } - return result; } /** @@ -361,7 +366,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws /** * Build an array to hold the "top" buckets for each ordinal. */ - abstract B[][] buildTopBucketsPerOrd(int size); + abstract ObjectArray buildTopBucketsPerOrd(long size); /** * Build an array of buckets for a particular ordinal to collect the @@ -379,7 +384,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws * Build the sub-aggregations into the buckets. This will usually * delegate to {@link #buildSubAggsForAllBuckets}. */ - abstract void buildSubAggs(B[][] topBucketsPerOrd) throws IOException; + abstract void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException; /** * Turn the buckets into an aggregation result. @@ -501,8 +506,8 @@ void updateBucket(StringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum } @Override - StringTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new StringTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -521,7 +526,7 @@ void finalizeBucket(StringTerms.Bucket bucket) { } @Override - void buildSubAggs(StringTerms.Bucket[][] topBucketsPerOrd) throws IOException { + void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); } @@ -637,8 +642,8 @@ void updateBucket(SignificantStringTerms.Bucket spare, BytesKeyedBucketOrds.Buck } @Override - SignificantStringTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new SignificantStringTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -657,7 +662,7 @@ void finalizeBucket(SignificantStringTerms.Bucket bucket) { } @Override - void buildSubAggs(SignificantStringTerms.Bucket[][] topBucketsPerOrd) throws IOException { + void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index d39348d80df14..e10f0b8944027 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.ObjectArrayPriorityQueue; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; @@ -39,7 +40,6 @@ import java.io.IOException; import java.util.Arrays; -import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; @@ -136,7 +136,7 @@ private void collectValue(long val, int doc, long owningBucketOrd, LeafBucketCol } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { return resultStrategy.buildAggregations(owningBucketOrds); } @@ -163,48 +163,52 @@ public void collectDebugInfo(BiConsumer add) { abstract class ResultStrategy implements Releasable { - private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - B[][] topBucketsPerOrd = buildTopBucketsPerOrd(owningBucketOrds.length); - long[] otherDocCounts = new long[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - collectZeroDocEntriesIfNeeded(owningBucketOrds[ordIdx], excludeDeletedDocs); - long bucketsInOrd = bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]); - - int size = (int) Math.min(bucketsInOrd, bucketCountThresholds.getShardSize()); - try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { - B spare = null; - BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - long docCount = bucketDocCount(ordsEnum.ord()); - otherDocCounts[ordIdx] += docCount; - if (docCount < bucketCountThresholds.getShardMinDocCount()) { - continue; - } - if (spare == null) { - spare = emptyBucketBuilder.get(); + private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try ( + LongArray otherDocCounts = bigArrays().newLongArray(owningBucketOrds.size(), true); + ObjectArray topBucketsPerOrd = buildTopBucketsPerOrd(owningBucketOrds.size()) + ) { + for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { + final long owningBucketOrd = owningBucketOrds.get(ordIdx); + collectZeroDocEntriesIfNeeded(owningBucketOrd, excludeDeletedDocs); + long bucketsInOrd = bucketOrds.bucketsInOrd(owningBucketOrd); + + int size = (int) Math.min(bucketsInOrd, bucketCountThresholds.getShardSize()); + try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { + B spare = null; + BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); + Supplier emptyBucketBuilder = emptyBucketBuilder(owningBucketOrd); + while (ordsEnum.next()) { + long docCount = bucketDocCount(ordsEnum.ord()); + otherDocCounts.increment(ordIdx, docCount); + if (docCount < bucketCountThresholds.getShardMinDocCount()) { + continue; + } + if (spare == null) { + spare = emptyBucketBuilder.get(); + } + updateBucket(spare, ordsEnum, docCount); + spare = ordered.insertWithOverflow(spare); } - updateBucket(spare, ordsEnum, docCount); - spare = ordered.insertWithOverflow(spare); - } - // Get the top buckets - B[] bucketsForOrd = buildBuckets((int) ordered.size()); - topBucketsPerOrd[ordIdx] = bucketsForOrd; - for (int b = (int) ordered.size() - 1; b >= 0; --b) { - topBucketsPerOrd[ordIdx][b] = ordered.pop(); - otherDocCounts[ordIdx] -= topBucketsPerOrd[ordIdx][b].getDocCount(); + // Get the top buckets + B[] bucketsForOrd = buildBuckets((int) ordered.size()); + topBucketsPerOrd.set(ordIdx, bucketsForOrd); + for (int b = (int) ordered.size() - 1; b >= 0; --b) { + topBucketsPerOrd.get(ordIdx)[b] = ordered.pop(); + otherDocCounts.increment(ordIdx, -topBucketsPerOrd.get(ordIdx)[b].getDocCount()); + } } } - } - buildSubAggs(topBucketsPerOrd); + buildSubAggs(topBucketsPerOrd); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - result[ordIdx] = buildResult(owningBucketOrds[ordIdx], otherDocCounts[ordIdx], topBucketsPerOrd[ordIdx]); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(topBucketsPerOrd.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + result[ordIdx] = buildResult(owningBucketOrds.get(ordIdx), otherDocCounts.get(ordIdx), topBucketsPerOrd.get(ordIdx)); + } + return result; } - return result; } /** @@ -227,7 +231,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws /** * Build an array to hold the "top" buckets for each ordinal. */ - abstract B[][] buildTopBucketsPerOrd(int size); + abstract ObjectArray buildTopBucketsPerOrd(long size); /** * Build an array of buckets for a particular ordinal. These arrays @@ -258,7 +262,7 @@ private InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws * Build the sub-aggregations into the buckets. This will usually * delegate to {@link #buildSubAggsForAllBuckets}. */ - abstract void buildSubAggs(B[][] topBucketsPerOrd) throws IOException; + abstract void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException; /** * Collect extra entries for "zero" hit documents if they were requested @@ -297,7 +301,7 @@ final ObjectArrayPriorityQueue buildPriorityQueue(int size) { } @Override - final void buildSubAggs(B[][] topBucketsPerOrd) throws IOException { + final void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } @@ -356,8 +360,8 @@ SortedNumericDocValues getValues(LeafReaderContext ctx) throws IOException { } @Override - LongTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new LongTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -397,7 +401,7 @@ LongTerms buildResult(long owningBucketOrd, long otherDocCount, LongTerms.Bucket bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, - List.of(topBuckets), + Arrays.asList(topBuckets), null ); } @@ -438,8 +442,8 @@ SortedNumericDocValues getValues(LeafReaderContext ctx) throws IOException { } @Override - DoubleTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new DoubleTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -479,7 +483,7 @@ DoubleTerms buildResult(long owningBucketOrd, long otherDocCount, DoubleTerms.Bu bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, - List.of(topBuckets), + Arrays.asList(topBuckets), null ); } @@ -551,8 +555,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } @Override - SignificantLongTerms.Bucket[][] buildTopBucketsPerOrd(int size) { - return new SignificantLongTerms.Bucket[size][]; + ObjectArray buildTopBucketsPerOrd(long size) { + return bigArrays().newObjectArray(size); } @Override @@ -583,7 +587,7 @@ ObjectArrayPriorityQueue buildPriorityQueue(int siz } @Override - void buildSubAggs(SignificantLongTerms.Bucket[][] topBucketsPerOrd) throws IOException { + void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } @@ -601,7 +605,7 @@ SignificantLongTerms buildResult(long owningBucketOrd, long otherDocCoun, Signif subsetSizes.get(owningBucketOrd), supersetSize, significanceHeuristic, - List.of(topBuckets) + Arrays.asList(topBuckets) ); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java index 2bc2833f0ddce..7200c33c71f70 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java @@ -12,6 +12,8 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.SetBackedScalingCuckooFilter; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.fielddata.FieldData; @@ -119,72 +121,82 @@ private void collectValue(BytesRef val, int doc, long owningBucketOrd, LeafBucke } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { /* * Collect the list of buckets, populate the filter with terms * that are too frequent, and figure out how to merge sub-buckets. */ - StringRareTerms.Bucket[][] rarestPerOrd = new StringRareTerms.Bucket[owningBucketOrds.length][]; - SetBackedScalingCuckooFilter[] filters = new SetBackedScalingCuckooFilter[owningBucketOrds.length]; - long keepCount = 0; - long[] mergeMap = new long[(int) bucketOrds.size()]; - Arrays.fill(mergeMap, -1); - long offset = 0; - for (int owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.length; owningOrdIdx++) { - try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(1, bigArrays())) { - filters[owningOrdIdx] = newFilter(); - List builtBuckets = new ArrayList<>(); - BytesKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds[owningOrdIdx]); - BytesRef scratch = new BytesRef(); - while (collectedBuckets.next()) { - collectedBuckets.readValue(scratch); - long docCount = bucketDocCount(collectedBuckets.ord()); - // if the key is below threshold, reinsert into the new ords - if (docCount <= maxDocCount) { - StringRareTerms.Bucket bucket = new StringRareTerms.Bucket(BytesRef.deepCopyOf(scratch), docCount, null, format); - bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(scratch); - mergeMap[(int) collectedBuckets.ord()] = bucket.bucketOrd; - builtBuckets.add(bucket); - keepCount++; - } else { - filters[owningOrdIdx].add(scratch); + try ( + ObjectArray rarestPerOrd = bigArrays().newObjectArray(owningBucketOrds.size()); + ObjectArray filters = bigArrays().newObjectArray(owningBucketOrds.size()) + ) { + try (LongArray mergeMap = bigArrays().newLongArray(bucketOrds.size())) { + mergeMap.fill(0, mergeMap.size(), -1); + long keepCount = 0; + long offset = 0; + for (long owningOrdIdx = 0; owningOrdIdx < owningBucketOrds.size(); owningOrdIdx++) { + try (BytesRefHash bucketsInThisOwningBucketToCollect = new BytesRefHash(1, bigArrays())) { + filters.set(owningOrdIdx, newFilter()); + List builtBuckets = new ArrayList<>(); + BytesKeyedBucketOrds.BucketOrdsEnum collectedBuckets = bucketOrds.ordsEnum(owningBucketOrds.get(owningOrdIdx)); + BytesRef scratch = new BytesRef(); + while (collectedBuckets.next()) { + collectedBuckets.readValue(scratch); + long docCount = bucketDocCount(collectedBuckets.ord()); + // if the key is below threshold, reinsert into the new ords + if (docCount <= maxDocCount) { + StringRareTerms.Bucket bucket = new StringRareTerms.Bucket( + BytesRef.deepCopyOf(scratch), + docCount, + null, + format + ); + bucket.bucketOrd = offset + bucketsInThisOwningBucketToCollect.add(scratch); + mergeMap.set(collectedBuckets.ord(), bucket.bucketOrd); + builtBuckets.add(bucket); + keepCount++; + } else { + filters.get(owningOrdIdx).add(scratch); + } + } + rarestPerOrd.set(owningOrdIdx, builtBuckets.toArray(StringRareTerms.Bucket[]::new)); + offset += bucketsInThisOwningBucketToCollect.size(); } } - rarestPerOrd[owningOrdIdx] = builtBuckets.toArray(StringRareTerms.Bucket[]::new); - offset += bucketsInThisOwningBucketToCollect.size(); - } - } - /* - * Only merge/delete the ordinals if we have actually deleted one, - * to save on some redundant work. - */ - if (keepCount != mergeMap.length) { - LongUnaryOperator howToMerge = b -> mergeMap[(int) b]; - rewriteBuckets(offset, howToMerge); - if (deferringCollector() != null) { - ((BestBucketsDeferringCollector) deferringCollector()).rewriteBuckets(howToMerge); + /* + * Only merge/delete the ordinals if we have actually deleted one, + * to save on some redundant work. + */ + if (keepCount != mergeMap.size()) { + LongUnaryOperator howToMerge = mergeMap::get; + rewriteBuckets(offset, howToMerge); + if (deferringCollector() != null) { + ((BestBucketsDeferringCollector) deferringCollector()).rewriteBuckets(howToMerge); + } + } } - } - /* - * Now build the results! - */ - buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - Arrays.sort(rarestPerOrd[ordIdx], ORDER.comparator()); - result[ordIdx] = new StringRareTerms( - name, - ORDER, - metadata(), - format, - Arrays.asList(rarestPerOrd[ordIdx]), - maxDocCount, - filters[ordIdx] - ); + /* + * Now build the results! + */ + buildSubAggsForAllBuckets(rarestPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + StringRareTerms.Bucket[] buckets = rarestPerOrd.get(ordIdx); + Arrays.sort(buckets, ORDER.comparator()); + result[ordIdx] = new StringRareTerms( + name, + ORDER, + metadata(), + format, + Arrays.asList(buckets), + maxDocCount, + filters.get(ordIdx) + ); + } + return result; } - return result; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/MetricsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/MetricsAggregator.java index 8742136c86ec6..0d767e356108a 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/MetricsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/MetricsAggregator.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.aggregations.metrics; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorBase; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -36,10 +37,10 @@ protected MetricsAggregator(String name, AggregationContext context, Aggregator public abstract InternalAggregation buildAggregation(long owningBucketOrd) throws IOException; @Override - public final InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - results[ordIdx] = buildAggregation(owningBucketOrds[ordIdx]); + public final InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = buildAggregation(owningBucketOrds.get(ordIdx)); } return results; } diff --git a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java index fff1990c29750..90e84acc7cad5 100644 --- a/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/profile/aggregation/ProfilingAggregator.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.profile.aggregation; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.InternalAggregation; @@ -68,7 +69,7 @@ public BucketComparator bucketComparator(String key, SortOrder order) { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { Timer timer = profileBreakdown.getNewTimer(AggregationTimingType.BUILD_AGGREGATION); InternalAggregation[] result; timer.start(); diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt index 86a99bca5ee5a..fd4827b8fa60f 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt @@ -44,3 +44,4 @@ X_OPAQUE_ID api-conventions. FORMING_SINGLE_NODE_CLUSTERS modules-discovery-bootstrap-cluster.html#modules-discovery-bootstrap-cluster-joining JDK_LOCALE_DIFFERENCES mapping-date-format.html#custom-date-format-locales ALLOCATION_EXPLAIN_MAX_RETRY cluster-allocation-explain.html#maximum-number-of-retries-exceeded +SECURE_SETTINGS secure-settings.html diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorScriptDocValuesTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorScriptDocValuesTests.java index a9bfb0d883ba7..435baa477e740 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorScriptDocValuesTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/MultiDenseVectorScriptDocValuesTests.java @@ -18,6 +18,7 @@ import org.elasticsearch.script.field.vectors.MultiDenseVector; import org.elasticsearch.script.field.vectors.MultiDenseVectorDocValuesField; import org.elasticsearch.test.ESTestCase; +import org.junit.BeforeClass; import java.io.IOException; import java.nio.ByteBuffer; @@ -28,13 +29,17 @@ public class MultiDenseVectorScriptDocValuesTests extends ESTestCase { + @BeforeClass + public static void setup() { + assumeTrue("Requires multi-dense vector support", MultiDenseVectorFieldMapper.FEATURE_FLAG.isEnabled()); + } + public void testFloatGetVectorValueAndGetMagnitude() throws IOException { int dims = 3; float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] expectedMagnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - IndexVersion indexVersion = IndexVersion.current(); - BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT, indexVersion); + BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT); BinaryDocValues magnitudeValues = wrap(expectedMagnitudes); MultiDenseVectorDocValuesField field = new FloatMultiDenseVectorDocValuesField( docValues, @@ -64,7 +69,7 @@ public void testByteGetVectorValueAndGetMagnitude() throws IOException { float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] expectedMagnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - BinaryDocValues docValues = wrap(vectors, ElementType.BYTE, IndexVersion.current()); + BinaryDocValues docValues = wrap(vectors, ElementType.BYTE); BinaryDocValues magnitudeValues = wrap(expectedMagnitudes); MultiDenseVectorDocValuesField field = new ByteMultiDenseVectorDocValuesField( docValues, @@ -91,10 +96,9 @@ public void testByteGetVectorValueAndGetMagnitude() throws IOException { public void testFloatMetadataAndIterator() throws IOException { int dims = 3; - IndexVersion indexVersion = IndexVersion.current(); float[][][] vectors = new float[][][] { fill(new float[3][dims], ElementType.FLOAT), fill(new float[2][dims], ElementType.FLOAT) }; float[][] magnitudes = new float[][] { new float[3], new float[2] }; - BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT, indexVersion); + BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new FloatMultiDenseVectorDocValuesField( @@ -120,10 +124,9 @@ public void testFloatMetadataAndIterator() throws IOException { public void testByteMetadataAndIterator() throws IOException { int dims = 3; - IndexVersion indexVersion = IndexVersion.current(); float[][][] vectors = new float[][][] { fill(new float[3][dims], ElementType.BYTE), fill(new float[2][dims], ElementType.BYTE) }; float[][] magnitudes = new float[][] { new float[3], new float[2] }; - BinaryDocValues docValues = wrap(vectors, ElementType.BYTE, indexVersion); + BinaryDocValues docValues = wrap(vectors, ElementType.BYTE); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new ByteMultiDenseVectorDocValuesField( docValues, @@ -159,7 +162,7 @@ public void testFloatMissingValues() throws IOException { int dims = 3; float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] magnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT, IndexVersion.current()); + BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new FloatMultiDenseVectorDocValuesField( docValues, @@ -183,7 +186,7 @@ public void testByteMissingValues() throws IOException { int dims = 3; float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] magnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - BinaryDocValues docValues = wrap(vectors, ElementType.BYTE, IndexVersion.current()); + BinaryDocValues docValues = wrap(vectors, ElementType.BYTE); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new ByteMultiDenseVectorDocValuesField( docValues, @@ -207,7 +210,7 @@ public void testFloatGetFunctionIsNotAccessible() throws IOException { int dims = 3; float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] magnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT, IndexVersion.current()); + BinaryDocValues docValues = wrap(vectors, ElementType.FLOAT); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new FloatMultiDenseVectorDocValuesField( docValues, @@ -233,7 +236,7 @@ public void testByteGetFunctionIsNotAccessible() throws IOException { int dims = 3; float[][][] vectors = { { { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 3 } }, { { 1, 0, 2 } } }; float[][] magnitudes = { { 1.7320f, 2.4495f, 3.3166f }, { 2.2361f } }; - BinaryDocValues docValues = wrap(vectors, ElementType.BYTE, IndexVersion.current()); + BinaryDocValues docValues = wrap(vectors, ElementType.BYTE); BinaryDocValues magnitudeValues = wrap(magnitudes); MultiDenseVectorDocValuesField field = new ByteMultiDenseVectorDocValuesField( docValues, @@ -303,7 +306,7 @@ public long cost() { }; } - public static BinaryDocValues wrap(float[][][] vectors, ElementType elementType, IndexVersion indexVersion) { + public static BinaryDocValues wrap(float[][][] vectors, ElementType elementType) { return new BinaryDocValues() { int idx = -1; int maxIdx = vectors.length; @@ -313,7 +316,7 @@ public BytesRef binaryValue() { if (idx >= maxIdx) { throw new IllegalStateException("max index exceeded"); } - return mockEncodeDenseVector(vectors[idx], elementType, indexVersion); + return mockEncodeDenseVector(vectors[idx], elementType, IndexVersion.current()); } @Override diff --git a/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java b/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java index dabc8672733e2..0e8c7e0857251 100644 --- a/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java +++ b/server/src/test/java/org/elasticsearch/ingest/ConfigurationUtilsTests.java @@ -65,11 +65,12 @@ public void testReadStringProperty() { } public void testReadStringPropertyInvalidType() { - try { - ConfigurationUtils.readStringProperty(null, null, config, "arr"); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[arr] property isn't a string, but of type [java.util.Arrays$ArrayList]")); - } + ElasticsearchParseException caught = assertThrows( + ElasticsearchParseException.class, + () -> ConfigurationUtils.readStringProperty(null, null, config, "arr") + ); + assertThat(caught.getMessage(), equalTo("[arr] property isn't a string, but of type [java.util.Arrays$ArrayList]")); + } public void testReadBooleanProperty() { @@ -83,11 +84,11 @@ public void testReadNullBooleanProperty() { } public void testReadBooleanPropertyInvalidType() { - try { - ConfigurationUtils.readBooleanProperty(null, null, config, "arr", true); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[arr] property isn't a boolean, but of type [java.util.Arrays$ArrayList]")); - } + ElasticsearchParseException caught = assertThrows( + ElasticsearchParseException.class, + () -> ConfigurationUtils.readBooleanProperty(null, null, config, "arr", true) + ); + assertThat(caught.getMessage(), equalTo("[arr] property isn't a boolean, but of type [java.util.Arrays$ArrayList]")); } public void testReadStringOrIntProperty() { @@ -98,11 +99,11 @@ public void testReadStringOrIntProperty() { } public void testReadStringOrIntPropertyInvalidType() { - try { - ConfigurationUtils.readStringOrIntProperty(null, null, config, "arr", null); - } catch (ElasticsearchParseException e) { - assertThat(e.getMessage(), equalTo("[arr] property isn't a string or int, but of type [java.util.Arrays$ArrayList]")); - } + ElasticsearchParseException caught = assertThrows( + ElasticsearchParseException.class, + () -> ConfigurationUtils.readStringOrIntProperty(null, null, config, "arr", null) + ); + assertThat(caught.getMessage(), equalTo("[arr] property isn't a string or int, but of type [java.util.Arrays$ArrayList]")); } public void testReadMediaProperty() { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java index 125b2d20cf9f3..6e9bb596e944b 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AdaptingAggregatorTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.search.aggregations; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperServiceTestCase; @@ -113,7 +114,7 @@ protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCt } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) { return new InternalAggregation[] { null }; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/AggregatorBaseTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/AggregatorBaseTests.java index 8d3fe0f7f6e79..2d0622dbb6322 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/AggregatorBaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/AggregatorBaseTests.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; @@ -47,7 +48,7 @@ protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCt } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) { throw new UnsupportedOperationException(); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java index 9b6ea7272d0f9..e796cee92c0dc 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BestBucketsDeferringCollectorTests.java @@ -28,6 +28,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.CheckedBiConsumer; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.search.aggregations.AggregationExecutionContext; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.BucketCollector; @@ -77,7 +79,7 @@ public ScoreMode scoreMode() { collector.preCollection(); indexSearcher.search(termQuery, collector.asCollector()); collector.postCollection(); - collector.prepareSelectedBuckets(0); + collector.prepareSelectedBuckets(BigArrays.NON_RECYCLING_INSTANCE.newLongArray(1, true)); assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { @@ -91,7 +93,7 @@ public ScoreMode scoreMode() { collector.preCollection(); indexSearcher.search(new MatchAllDocsQuery(), collector.asCollector()); collector.postCollection(); - collector.prepareSelectedBuckets(0); + collector.prepareSelectedBuckets(BigArrays.NON_RECYCLING_INSTANCE.newLongArray(1, true)); assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { @@ -141,7 +143,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } } }, (deferringCollector, finalCollector) -> { - deferringCollector.prepareSelectedBuckets(0, 8, 9); + deferringCollector.prepareSelectedBuckets(toLongArray(0, 8, 9)); equalTo(Map.of(0L, List.of(0, 1, 2, 3, 4, 5, 6, 7), 1L, List.of(8), 2L, List.of(9))); }); @@ -158,7 +160,7 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } } }, (deferringCollector, finalCollector) -> { - deferringCollector.prepareSelectedBuckets(0, 8, 9); + deferringCollector.prepareSelectedBuckets(toLongArray(0, 8, 9)); assertThat(finalCollector.collection, equalTo(Map.of(0L, List.of(4, 5, 6, 7), 1L, List.of(8), 2L, List.of(9)))); }); @@ -176,12 +178,20 @@ public void collect(int doc, long owningBucketOrd) throws IOException { } } }, (deferringCollector, finalCollector) -> { - deferringCollector.prepareSelectedBuckets(0, 8, 9); + deferringCollector.prepareSelectedBuckets(toLongArray(0, 8, 9)); assertThat(finalCollector.collection, equalTo(Map.of(0L, List.of(0, 1, 2, 3), 1L, List.of(8), 2L, List.of(9)))); }); } + private LongArray toLongArray(long... lons) { + LongArray longArray = BigArrays.NON_RECYCLING_INSTANCE.newLongArray(lons.length); + for (int i = 0; i < lons.length; i++) { + longArray.set(i, lons[i]); + } + return longArray; + } + private void testCase( BiFunction leafCollector, CheckedBiConsumer verify diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregatorTests.java index 80f27b31ca65b..fb4c62ad66f19 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregatorTests.java @@ -16,6 +16,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NumberFieldMapper; @@ -72,7 +73,7 @@ protected LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCt } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) { return new InternalAggregation[0]; } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java index 2df6a0cfb91ca..a0a24e98ae721 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/sampler/BestDocsDeferringCollectorTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.MockBigArrays; import org.elasticsearch.common.util.MockPageCacheRecycler; import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; @@ -68,7 +69,7 @@ public void testReplay() throws Exception { collector.preCollection(); indexSearcher.search(termQuery, collector.asCollector()); collector.postCollection(); - collector.prepareSelectedBuckets(0); + collector.prepareSelectedBuckets(BigArrays.NON_RECYCLING_INSTANCE.newLongArray(1, true)); assertEquals(topDocs.scoreDocs.length, deferredCollectedDocIds.size()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index ee671651b8cff..11787866af0d7 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -18,8 +18,7 @@ public enum FeatureFlag { TIME_SERIES_MODE("es.index_mode_feature_flag_registered=true", Version.fromString("8.0.0"), null), FAILURE_STORE_ENABLED("es.failure_store_feature_flag_enabled=true", Version.fromString("8.12.0"), null), - SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null), - INFERENCE_DEFAULT_ELSER("es.inference_default_elser_feature_flag_enabled=true", Version.fromString("8.16.0"), null); + SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null); public final String systemProperty; public final Version from; diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java index 85882a5c56851..0c6e94a15ec36 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java @@ -20,6 +20,8 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.util.LongArray; +import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.util.ObjectArrayPriorityQueue; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Releasables; @@ -235,57 +237,62 @@ protected void doClose() { } @Override - public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - InternalMultiTerms.Bucket[][] topBucketsPerOrd = new InternalMultiTerms.Bucket[owningBucketOrds.length][]; - long[] otherDocCounts = new long[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - long bucketsInOrd = bucketOrds.bucketsInOrd(owningBucketOrds[ordIdx]); - - int size = (int) Math.min(bucketsInOrd, bucketCountThresholds.getShardSize()); - try ( - ObjectArrayPriorityQueue ordered = new BucketPriorityQueue<>( - size, - bigArrays(), - partiallyBuiltBucketComparator - ) - ) { - InternalMultiTerms.Bucket spare = null; - BytesRef spareKey = null; - BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds[ordIdx]); - while (ordsEnum.next()) { - long docCount = bucketDocCount(ordsEnum.ord()); - otherDocCounts[ordIdx] += docCount; - if (docCount < bucketCountThresholds.getShardMinDocCount()) { - continue; - } - if (spare == null) { - spare = new InternalMultiTerms.Bucket(null, 0, null, showTermDocCountError, 0, formats, keyConverters); - spareKey = new BytesRef(); + public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try ( + LongArray otherDocCounts = bigArrays().newLongArray(owningBucketOrds.size(), true); + ObjectArray topBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size()) + ) { + for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) { + final long owningBucketOrd = owningBucketOrds.get(ordIdx); + long bucketsInOrd = bucketOrds.bucketsInOrd(owningBucketOrd); + + int size = (int) Math.min(bucketsInOrd, bucketCountThresholds.getShardSize()); + try ( + ObjectArrayPriorityQueue ordered = new BucketPriorityQueue<>( + size, + bigArrays(), + partiallyBuiltBucketComparator + ) + ) { + InternalMultiTerms.Bucket spare = null; + BytesRef spareKey = null; + BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); + while (ordsEnum.next()) { + long docCount = bucketDocCount(ordsEnum.ord()); + otherDocCounts.increment(ordIdx, docCount); + if (docCount < bucketCountThresholds.getShardMinDocCount()) { + continue; + } + if (spare == null) { + spare = new InternalMultiTerms.Bucket(null, 0, null, showTermDocCountError, 0, formats, keyConverters); + spareKey = new BytesRef(); + } + ordsEnum.readValue(spareKey); + spare.terms = unpackTerms(spareKey); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + spare = ordered.insertWithOverflow(spare); } - ordsEnum.readValue(spareKey); - spare.terms = unpackTerms(spareKey); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); - spare = ordered.insertWithOverflow(spare); - } - // Get the top buckets - InternalMultiTerms.Bucket[] bucketsForOrd = new InternalMultiTerms.Bucket[(int) ordered.size()]; - topBucketsPerOrd[ordIdx] = bucketsForOrd; - for (int b = (int) ordered.size() - 1; b >= 0; --b) { - topBucketsPerOrd[ordIdx][b] = ordered.pop(); - otherDocCounts[ordIdx] -= topBucketsPerOrd[ordIdx][b].getDocCount(); + // Get the top buckets + InternalMultiTerms.Bucket[] bucketsForOrd = new InternalMultiTerms.Bucket[(int) ordered.size()]; + topBucketsPerOrd.set(ordIdx, bucketsForOrd); + for (int b = (int) ordered.size() - 1; b >= 0; --b) { + InternalMultiTerms.Bucket[] buckets = topBucketsPerOrd.get(ordIdx); + buckets[b] = ordered.pop(); + otherDocCounts.increment(ordIdx, -buckets[b].getDocCount()); + } } } - } - buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); + buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, a) -> b.aggregations = a); - InternalAggregation[] result = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { - result[ordIdx] = buildResult(otherDocCounts[ordIdx], topBucketsPerOrd[ordIdx]); + InternalAggregation[] result = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < result.length; ordIdx++) { + result[ordIdx] = buildResult(otherDocCounts.get(ordIdx), topBucketsPerOrd.get(ordIdx)); + } + return result; } - return result; } InternalMultiTerms buildResult(long otherDocCount, InternalMultiTerms.Bucket[] topBuckets) { @@ -305,7 +312,7 @@ InternalMultiTerms buildResult(long otherDocCount, InternalMultiTerms.Bucket[] t bucketCountThresholds.getShardSize(), showTermDocCountError, otherDocCount, - List.of(topBuckets), + Arrays.asList(topBuckets), 0, formats, keyConverters, diff --git a/x-pack/plugin/apm-data/src/main/resources/component-templates/apm@mappings.yaml b/x-pack/plugin/apm-data/src/main/resources/component-templates/apm@mappings.yaml index ac6462c86676c..a5a3a7433f4c1 100644 --- a/x-pack/plugin/apm-data/src/main/resources/component-templates/apm@mappings.yaml +++ b/x-pack/plugin/apm-data/src/main/resources/component-templates/apm@mappings.yaml @@ -4,6 +4,7 @@ _meta: managed: true template: mappings: + date_detection: false dynamic: true dynamic_templates: - numeric_labels: diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/Function.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/Function.java index cad5c631088f2..a1afcdbf1f77c 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/Function.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/function/Function.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.esql.core.expression.function; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.Nullability; @@ -42,6 +43,11 @@ public Nullability nullable() { return Expressions.nullable(children()); } + /** Return true if this function can be executed under the provided {@link XPackLicenseState}, otherwise false.*/ + public boolean checkLicense(XPackLicenseState state) { + return true; + } + @Override public int hashCode() { return Objects.hash(getClass(), children()); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index 2913401d8aab3..d6715a932c075 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -29,6 +29,7 @@ import org.elasticsearch.geo.GeometryTestUtils; import org.elasticsearch.geo.ShapeTestUtils; import org.elasticsearch.index.IndexMode; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.json.JsonXContent; import org.elasticsearch.xpack.esql.action.EsqlQueryResponse; @@ -342,7 +343,7 @@ public String toString() { public static final Configuration TEST_CFG = configuration(new QueryPragmas(Settings.EMPTY)); - public static final Verifier TEST_VERIFIER = new Verifier(new Metrics(new EsqlFunctionRegistry())); + public static final Verifier TEST_VERIFIER = new Verifier(new Metrics(new EsqlFunctionRegistry()), new XPackLicenseState(() -> 0L)); private EsqlTestUtils() {} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index c86c72ed34421..b2333c077400d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2468,6 +2468,7 @@ count:long |values:keyword |job_positions:keyword ; prunedStatsFollowedByStats +required_capability: per_agg_filtering from employees | eval my_length = length(concat(first_name, null)) | stats count = count(my_length) where false, diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index d399c826e0bf2..0641a03c88b69 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.analysis; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.core.capabilities.Unresolvable; @@ -82,9 +83,11 @@ public class Verifier { private final Metrics metrics; + private final XPackLicenseState licenseState; - public Verifier(Metrics metrics) { + public Verifier(Metrics metrics, XPackLicenseState licenseState) { this.metrics = metrics; + this.licenseState = licenseState; } /** @@ -201,6 +204,10 @@ else if (p instanceof Lookup lookup) { }); checkRemoteEnrich(plan, failures); + if (failures.isEmpty()) { + checkLicense(plan, licenseState, failures); + } + // gather metrics if (failures.isEmpty()) { gatherMetrics(plan, partialMetrics); @@ -546,6 +553,14 @@ private static void checkBinaryComparison(LogicalPlan p, Set failures) }); } + private void checkLicense(LogicalPlan plan, XPackLicenseState licenseState, Set failures) { + plan.forEachExpressionDown(Function.class, p -> { + if (p.checkLicense(licenseState) == false) { + failures.add(new Failure(p, "current license is non-compliant for function [" + p.sourceText() + "]")); + } + }); + } + private void gatherMetrics(LogicalPlan plan, BitSet b) { plan.forEachDown(p -> FeatureMetric.set(p, b)); for (int i = b.nextSetBit(0); i >= 0; i = b.nextSetBit(i + 1)) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java index 816388193c5f6..c1269009c6a41 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java @@ -9,6 +9,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.indices.IndicesExpressionGrouper; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.xpack.esql.action.EsqlExecutionInfo; import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; @@ -40,13 +41,13 @@ public class PlanExecutor { private final Verifier verifier; private final PlanningMetricsManager planningMetricsManager; - public PlanExecutor(IndexResolver indexResolver, MeterRegistry meterRegistry) { + public PlanExecutor(IndexResolver indexResolver, MeterRegistry meterRegistry, XPackLicenseState licenseState) { this.indexResolver = indexResolver; this.preAnalyzer = new PreAnalyzer(); this.functionRegistry = new EsqlFunctionRegistry(); this.mapper = new Mapper(); this.metrics = new Metrics(functionRegistry); - this.verifier = new Verifier(metrics); + this.verifier = new Verifier(metrics, licenseState); this.planningMetricsManager = new PlanningMetricsManager(meterRegistry); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java index fa8a9e7d8c837..3a0d616d407a3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Count.java @@ -58,7 +58,9 @@ public class Count extends AggregateFunction implements ToAggregator, SurrogateE ), @Example( description = "To count the same stream of data based on two different expressions " - + "use the pattern `COUNT( OR NULL)`", + + "use the pattern `COUNT( OR NULL)`. This builds on the three-valued logic " + + "({wikipedia}/Three-valued_logic[3VL]) of the language: `TRUE OR NULL` is `TRUE`, but `FALSE OR NULL` is `NULL`, " + + "plus the way COUNT handles `NULL`s: `COUNT(TRUE)` and `COUNT(FALSE)` are both 1, but `COUNT(NULL)` is 0.", file = "stats", tag = "count-or-null" ) } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 9b4d51af244b8..ccea9ad010d99 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -38,6 +38,7 @@ import org.elasticsearch.compute.operator.exchange.ExchangeSourceOperator; import org.elasticsearch.compute.operator.topn.TopNOperatorStatus; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestController; @@ -45,6 +46,7 @@ import org.elasticsearch.threadpool.ExecutorBuilder; import org.elasticsearch.threadpool.FixedExecutorBuilder; import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; import org.elasticsearch.xpack.esql.EsqlInfoTransportAction; @@ -116,7 +118,7 @@ public Collection createComponents(PluginServices services) { BlockFactory blockFactory = new BlockFactory(circuitBreaker, bigArrays, maxPrimitiveArrayBlockSize); setupSharedSecrets(); return List.of( - new PlanExecutor(new IndexResolver(services.client()), services.telemetryProvider().getMeterRegistry()), + new PlanExecutor(new IndexResolver(services.client()), services.telemetryProvider().getMeterRegistry(), getLicenseState()), new ExchangeService(services.clusterService().getSettings(), services.threadPool(), ThreadPool.Names.SEARCH, blockFactory), blockFactory ); @@ -131,6 +133,11 @@ private void setupSharedSecrets() { } } + // to be overriden by tests + protected XPackLicenseState getLicenseState() { + return XPackPlugin.getSharedLicenseState(); + } + /** * The settings defined by the ESQL plugin. * diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/CheckLicenseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/CheckLicenseTests.java new file mode 100644 index 0000000000000..98f36d339976c --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/CheckLicenseTests.java @@ -0,0 +1,138 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function; + +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicensedFeature; +import org.elasticsearch.license.TestUtils; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.license.internal.XPackLicenseStatus; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.analysis.Verifier; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.function.Function; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.stats.Metrics; + +import java.util.List; + +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.analyzerDefaultMapping; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultEnrichResolution; +import static org.hamcrest.Matchers.containsString; + +public class CheckLicenseTests extends ESTestCase { + + private final EsqlParser parser = new EsqlParser(); + private final String esql = "from tests | eval license() | LIMIT 10"; + + public void testLicense() { + for (License.OperationMode functionLicense : License.OperationMode.values()) { + final LicensedFeature functionLicenseFeature = random().nextBoolean() + ? LicensedFeature.momentary("test", "license", functionLicense) + : LicensedFeature.persistent("test", "license", functionLicense); + final EsqlFunctionRegistry.FunctionBuilder builder = (source, expression, cfg) -> { + final LicensedFunction licensedFunction = new LicensedFunction(source); + licensedFunction.setLicensedFeature(functionLicenseFeature); + return licensedFunction; + }; + for (License.OperationMode operationMode : License.OperationMode.values()) { + if (License.OperationMode.TRIAL != operationMode && License.OperationMode.compare(operationMode, functionLicense) < 0) { + // non-compliant license + final VerificationException ex = expectThrows(VerificationException.class, () -> analyze(builder, operationMode)); + assertThat(ex.getMessage(), containsString("current license is non-compliant for function [license()]")); + } else { + // compliant license + assertNotNull(analyze(builder, operationMode)); + } + } + } + } + + private LogicalPlan analyze(EsqlFunctionRegistry.FunctionBuilder builder, License.OperationMode operationMode) { + final FunctionDefinition def = EsqlFunctionRegistry.def(LicensedFunction.class, builder, "license"); + final EsqlFunctionRegistry registry = new EsqlFunctionRegistry(def) { + @Override + public EsqlFunctionRegistry snapshotRegistry() { + return this; + } + }; + return analyzer(registry, operationMode).analyze(parser.createStatement(esql)); + } + + private static Analyzer analyzer(EsqlFunctionRegistry registry, License.OperationMode operationMode) { + return new Analyzer( + new AnalyzerContext(EsqlTestUtils.TEST_CFG, registry, analyzerDefaultMapping(), defaultEnrichResolution()), + new Verifier(new Metrics(new EsqlFunctionRegistry()), getLicenseState(operationMode)) + ); + } + + private static XPackLicenseState getLicenseState(License.OperationMode operationMode) { + final TestUtils.UpdatableLicenseState licenseState = new TestUtils.UpdatableLicenseState(); + licenseState.update(new XPackLicenseStatus(operationMode, true, null)); + return licenseState; + } + + // It needs to be public because we run validation on it via reflection in org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests. + // This test prevents to add the license as constructor parameter too. + public static class LicensedFunction extends Function { + + private LicensedFeature licensedFeature; + + public LicensedFunction(Source source) { + super(source, List.of()); + } + + void setLicensedFeature(LicensedFeature licensedFeature) { + this.licensedFeature = licensedFeature; + } + + @Override + public boolean checkLicense(XPackLicenseState state) { + if (licensedFeature instanceof LicensedFeature.Momentary momentary) { + return momentary.check(state); + } else { + return licensedFeature.checkWithoutTracking(state); + } + } + + @Override + public DataType dataType() { + return DataType.KEYWORD; + } + + @Override + public Expression replaceChildren(List newChildren) { + throw new UnsupportedOperationException("this type of node doesn't have any children to replace"); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this); + } + + @Override + public String getWriteableName() { + throw new UnsupportedOperationException(); + } + + @Override + public void writeTo(StreamOutput out) { + throw new UnsupportedOperationException(); + } + } + +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index c849b08d64ab0..edf678ed0b385 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.EsqlTestUtils.TestSearchStats; @@ -144,7 +145,7 @@ private Analyzer makeAnalyzer(String mappingFileName, EnrichResolution enrichRes return new Analyzer( new AnalyzerContext(config, new EsqlFunctionRegistry(), getIndexResult, enrichResolution), - new Verifier(new Metrics(new EsqlFunctionRegistry())) + new Verifier(new Metrics(new EsqlFunctionRegistry()), new XPackLicenseState(() -> 0L)) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/QueryTranslatorTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/QueryTranslatorTests.java index cf90cf96fe683..57210fda07f2b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/QueryTranslatorTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.esql.planner; import org.elasticsearch.index.IndexMode; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.analysis.Analyzer; @@ -46,7 +47,7 @@ private static Analyzer makeAnalyzer(String mappingFileName) { return new Analyzer( new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, new EnrichResolution()), - new Verifier(new Metrics(new EsqlFunctionRegistry())) + new Verifier(new Metrics(new EsqlFunctionRegistry()), new XPackLicenseState(() -> 0L)) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java index 116df21a33ac0..b323efad2b4c3 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/PlanExecutorMetricsTests.java @@ -17,6 +17,7 @@ import org.elasticsearch.client.internal.Client; import org.elasticsearch.index.IndexMode; import org.elasticsearch.indices.IndicesExpressionGrouper; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; @@ -102,7 +103,7 @@ public void testFailedMetric() { return null; }).when(esqlClient).execute(eq(EsqlResolveFieldsAction.TYPE), any(), any()); - var planExecutor = new PlanExecutor(indexResolver, MeterRegistry.NOOP); + var planExecutor = new PlanExecutor(indexResolver, MeterRegistry.NOOP, new XPackLicenseState(() -> 0L)); var enrichResolver = mockEnrichResolver(); var request = new EsqlQueryRequest(); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/VerifierMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/VerifierMetricsTests.java index 5e6588d2295f9..eda906b147956 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/VerifierMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/stats/VerifierMetricsTests.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.esql.stats; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.watcher.common.stats.Counters; import org.elasticsearch.xpack.esql.analysis.Verifier; @@ -205,7 +206,7 @@ public void testTwoWhereQuery() { public void testTwoQueriesExecuted() { Metrics metrics = new Metrics(new EsqlFunctionRegistry()); - Verifier verifier = new Verifier(metrics); + Verifier verifier = new Verifier(metrics, new XPackLicenseState(() -> 0L)); esqlWithVerifier(""" from employees | where languages > 2 @@ -252,7 +253,7 @@ public void testTwoQueriesExecuted() { public void testMultipleFunctions() { Metrics metrics = new Metrics(new EsqlFunctionRegistry()); - Verifier verifier = new Verifier(metrics); + Verifier verifier = new Verifier(metrics, new XPackLicenseState(() -> 0L)); esqlWithVerifier(""" from employees | where languages > 2 @@ -526,7 +527,7 @@ private Counters esql(String esql, Verifier v) { Metrics metrics = null; if (v == null) { metrics = new Metrics(new EsqlFunctionRegistry()); - verifier = new Verifier(metrics); + verifier = new Verifier(metrics, new XPackLicenseState(() -> 0L)); } analyzer(verifier).analyze(parser.createStatement(esql)); diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java index 0594975064c8f..285ddc9c5829c 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/DefaultEndPointsIT.java @@ -7,6 +7,9 @@ package org.elasticsearch.xpack.inference; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseListener; +import org.elasticsearch.common.Strings; import org.elasticsearch.inference.TaskType; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService; @@ -15,9 +18,12 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.concurrent.CountDownLatch; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -39,7 +45,6 @@ public void tearDown() throws Exception { @SuppressWarnings("unchecked") public void testInferDeploysDefaultElser() throws IOException { - assumeTrue("Default config requires a feature flag", DefaultElserFeatureFlag.isEnabled()); var model = getModel(ElasticsearchInternalService.DEFAULT_ELSER_ID); assertDefaultElserConfig(model); @@ -70,7 +75,6 @@ private static void assertDefaultElserConfig(Map modelConfig) { @SuppressWarnings("unchecked") public void testInferDeploysDefaultE5() throws IOException { - assumeTrue("Default config requires a feature flag", DefaultElserFeatureFlag.isEnabled()); var model = getModel(ElasticsearchInternalService.DEFAULT_E5_ID); assertDefaultE5Config(model); @@ -102,4 +106,37 @@ private static void assertDefaultE5Config(Map modelConfig) { Matchers.is(Map.of("enabled", true, "min_number_of_allocations", 0, "max_number_of_allocations", 32)) ); } + + public void testMultipleInferencesTriggeringDownloadAndDeploy() throws InterruptedException { + int numParallelRequests = 4; + var latch = new CountDownLatch(numParallelRequests); + var errors = new ArrayList(); + + var listener = new ResponseListener() { + @Override + public void onSuccess(Response response) { + latch.countDown(); + } + + @Override + public void onFailure(Exception exception) { + errors.add(exception); + latch.countDown(); + } + }; + + var inputs = List.of("Hello World", "Goodnight moon"); + var queryParams = Map.of("timeout", "120s"); + for (int i = 0; i < numParallelRequests; i++) { + var request = createInferenceRequest( + Strings.format("_inference/%s", ElasticsearchInternalService.DEFAULT_ELSER_ID), + inputs, + queryParams + ); + client().performRequestAsync(request, listener); + } + + latch.await(); + assertThat(errors.toString(), errors, empty()); + } } diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java index 6790b9bb14c5a..4e32ef99d06dd 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceBaseRestTest.java @@ -373,12 +373,17 @@ protected Map infer(String modelId, TaskType taskType, List inferInternal(String endpoint, List input, Map queryParameters) throws IOException { + protected Request createInferenceRequest(String endpoint, List input, Map queryParameters) { var request = new Request("POST", endpoint); request.setJsonEntity(jsonBody(input)); if (queryParameters.isEmpty() == false) { request.addParameters(queryParameters); } + return request; + } + + private Map inferInternal(String endpoint, List input, Map queryParameters) throws IOException { + var request = createInferenceRequest(endpoint, input, queryParameters); var response = client().performRequest(request); assertOkOrCreated(response); return entityAsMap(response); diff --git a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java index 53c82219e2f12..f1831acbcc40f 100644 --- a/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java +++ b/x-pack/plugin/inference/qa/inference-service-tests/src/javaRestTest/java/org/elasticsearch/xpack/inference/InferenceCrudIT.java @@ -41,18 +41,18 @@ public void testCRUD() throws IOException { } var getAllModels = getAllModels(); - int numModels = DefaultElserFeatureFlag.isEnabled() ? 11 : 9; + int numModels = 11; assertThat(getAllModels, hasSize(numModels)); var getSparseModels = getModels("_all", TaskType.SPARSE_EMBEDDING); - int numSparseModels = DefaultElserFeatureFlag.isEnabled() ? 6 : 5; + int numSparseModels = 6; assertThat(getSparseModels, hasSize(numSparseModels)); for (var sparseModel : getSparseModels) { assertEquals("sparse_embedding", sparseModel.get("task_type")); } var getDenseModels = getModels("_all", TaskType.TEXT_EMBEDDING); - int numDenseModels = DefaultElserFeatureFlag.isEnabled() ? 5 : 4; + int numDenseModels = 5; assertThat(getDenseModels, hasSize(numDenseModels)); for (var denseModel : getDenseModels) { assertEquals("text_embedding", denseModel.get("task_type")); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java index 73c0f6d4c7685..24248f832a8dd 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterIT.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.bulk.BulkItemResponse; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.delete.DeleteRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; @@ -30,8 +31,10 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Locale; import java.util.Map; +import java.util.Set; import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.randomSemanticTextInput; import static org.hamcrest.Matchers.equalTo; @@ -87,30 +90,38 @@ public void testBulkOperations() throws Exception { int totalBulkReqs = randomIntBetween(2, 100); long totalDocs = 0; + Set ids = new HashSet<>(); for (int bulkReqs = 0; bulkReqs < totalBulkReqs; bulkReqs++) { BulkRequestBuilder bulkReqBuilder = client().prepareBulk(); int totalBulkSize = randomIntBetween(1, 100); for (int bulkSize = 0; bulkSize < totalBulkSize; bulkSize++) { - String id = Long.toString(totalDocs); + if (ids.size() > 0 && rarely(random())) { + String id = randomFrom(ids); + ids.remove(id); + DeleteRequestBuilder request = new DeleteRequestBuilder(client(), INDEX_NAME).setId(id); + bulkReqBuilder.add(request); + continue; + } + String id = Long.toString(totalDocs++); boolean isIndexRequest = randomBoolean(); Map source = new HashMap<>(); source.put("sparse_field", isIndexRequest && rarely() ? null : randomSemanticTextInput()); source.put("dense_field", isIndexRequest && rarely() ? null : randomSemanticTextInput()); if (isIndexRequest) { bulkReqBuilder.add(new IndexRequestBuilder(client()).setIndex(INDEX_NAME).setId(id).setSource(source)); - totalDocs++; + ids.add(id); } else { boolean isUpsert = randomBoolean(); UpdateRequestBuilder request = new UpdateRequestBuilder(client()).setIndex(INDEX_NAME).setDoc(source); - if (isUpsert || totalDocs == 0) { + if (isUpsert || ids.size() == 0) { request.setDocAsUpsert(true); - totalDocs++; } else { // Update already existing document - id = Long.toString(randomLongBetween(0, totalDocs - 1)); + id = randomFrom(ids); } request.setId(id); bulkReqBuilder.add(request); + ids.add(id); } } BulkResponse bulkResponse = bulkReqBuilder.get(); @@ -135,7 +146,7 @@ public void testBulkOperations() throws Exception { SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0).trackTotalHits(true); SearchResponse searchResponse = client().search(new SearchRequest(INDEX_NAME).source(sourceBuilder)).get(); try { - assertThat(searchResponse.getHits().getTotalHits().value, equalTo(totalDocs)); + assertThat(searchResponse.getHits().getTotalHits().value, equalTo((long) ids.size())); } finally { searchResponse.decRef(); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/DefaultElserFeatureFlag.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/DefaultElserFeatureFlag.java deleted file mode 100644 index 2a764dabd62ae..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/DefaultElserFeatureFlag.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference; - -import org.elasticsearch.common.util.FeatureFlag; - -public class DefaultElserFeatureFlag { - - private DefaultElserFeatureFlag() {} - - private static final FeatureFlag FEATURE_FLAG = new FeatureFlag("inference_default_elser"); - - public static boolean isEnabled() { - return FEATURE_FLAG.isEnabled(); - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java index 9d3a263b506c9..adf0abf237df5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceFeatures.java @@ -13,7 +13,6 @@ import org.elasticsearch.xpack.inference.rank.random.RandomRankRetrieverBuilder; import org.elasticsearch.xpack.inference.rank.textsimilarity.TextSimilarityRankRetrieverBuilder; -import java.util.HashSet; import java.util.Set; /** @@ -23,22 +22,22 @@ public class InferenceFeatures implements FeatureSpecification { @Override public Set getFeatures() { - var features = new HashSet(); - features.add(TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RETRIEVER_SUPPORTED); - features.add(RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED); - features.add(SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID); - features.add(TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_COMPOSITION_SUPPORTED); - if (DefaultElserFeatureFlag.isEnabled()) { - features.add(SemanticTextFieldMapper.SEMANTIC_TEXT_DEFAULT_ELSER_2); - } - return Set.copyOf(features); + return Set.of( + TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_RETRIEVER_SUPPORTED, + RandomRankRetrieverBuilder.RANDOM_RERANKER_RETRIEVER_SUPPORTED, + SemanticTextFieldMapper.SEMANTIC_TEXT_SEARCH_INFERENCE_ID, + TextSimilarityRankRetrieverBuilder.TEXT_SIMILARITY_RERANKER_COMPOSITION_SUPPORTED, + SemanticTextFieldMapper.SEMANTIC_TEXT_DEFAULT_ELSER_2 + ); } @Override public Set getTestFeatures() { return Set.of( SemanticTextFieldMapper.SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX, - SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX + SemanticTextFieldMapper.SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_DELETE_FIX, + SemanticTextFieldMapper.SEMANTIC_TEXT_ZERO_SIZE_FIX ); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index 19106fdd5f19a..b3ab421e71e9a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -226,10 +226,8 @@ public Collection createComponents(PluginServices services) { // reference correctly var registry = new InferenceServiceRegistry(inferenceServices, factoryContext); registry.init(services.client()); - if (DefaultElserFeatureFlag.isEnabled()) { - for (var service : registry.getServices().values()) { - service.defaultConfigIds().forEach(modelRegistry::addDefaultIds); - } + for (var service : registry.getServices().values()) { + service.defaultConfigIds().forEach(modelRegistry::addDefaultIds); } inferenceServiceRegistry.set(registry); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index b3bbe3a7df9bc..dd59230e575c4 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -413,8 +413,8 @@ private void applyInferenceResponses(BulkItemRequest item, FieldInferenceRespons */ private Map> createFieldInferenceRequests(BulkShardRequest bulkShardRequest) { Map> fieldRequestsMap = new LinkedHashMap<>(); - int itemIndex = 0; - for (var item : bulkShardRequest.items()) { + for (int itemIndex = 0; itemIndex < bulkShardRequest.items().length; itemIndex++) { + var item = bulkShardRequest.items()[itemIndex]; if (item.getPrimaryResponse() != null) { // item was already aborted/processed by a filter in the chain upstream (e.g. security) continue; @@ -441,6 +441,7 @@ private Map> createFieldInferenceRequests(Bu // ignore delete request continue; } + final Map docMap = indexRequest.sourceAsMap(); for (var entry : fieldInferenceMap.values()) { String field = entry.getName(); @@ -483,7 +484,6 @@ private Map> createFieldInferenceRequests(Bu } } } - itemIndex++; } return fieldRequestsMap; } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index d70931a85c82e..4ea2a86123829 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -57,7 +57,6 @@ import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.inference.DefaultElserFeatureFlag; import java.io.IOException; import java.util.ArrayList; @@ -70,6 +69,7 @@ import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.search.SearchService.DEFAULT_SIZE; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_EMBEDDINGS_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKED_TEXT_FIELD; import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.CHUNKS_FIELD; @@ -90,8 +90,9 @@ public class SemanticTextFieldMapper extends FieldMapper implements InferenceFie public static final NodeFeature SEMANTIC_TEXT_SEARCH_INFERENCE_ID = new NodeFeature("semantic_text.search_inference_id"); public static final NodeFeature SEMANTIC_TEXT_DEFAULT_ELSER_2 = new NodeFeature("semantic_text.default_elser_2"); public static final NodeFeature SEMANTIC_TEXT_IN_OBJECT_FIELD_FIX = new NodeFeature("semantic_text.in_object_field_fix"); - public static final NodeFeature SEMANTIC_TEXT_SINGLE_FIELD_UPDATE_FIX = new NodeFeature("semantic_text.single_field_update_fix"); + public static final NodeFeature SEMANTIC_TEXT_DELETE_FIX = new NodeFeature("semantic_text.delete_fix"); + public static final NodeFeature SEMANTIC_TEXT_ZERO_SIZE_FIX = new NodeFeature("semantic_text.zero_size_fix"); public static final String CONTENT_TYPE = "semantic_text"; public static final String DEFAULT_ELSER_2_INFERENCE_ID = DEFAULT_ELSER_ID; @@ -111,16 +112,12 @@ public static class Builder extends FieldMapper.Builder { INFERENCE_ID_FIELD, false, mapper -> ((SemanticTextFieldType) mapper.fieldType()).inferenceId, - DefaultElserFeatureFlag.isEnabled() ? DEFAULT_ELSER_2_INFERENCE_ID : null + DEFAULT_ELSER_2_INFERENCE_ID ).addValidator(v -> { if (Strings.isEmpty(v)) { - // If the default ELSER feature flag is enabled, the only way we get here is if the user explicitly sets the param to an - // empty value. However, if the feature flag is disabled, we can get here if the user didn't set the param. - // Adjust the error message appropriately. - String message = DefaultElserFeatureFlag.isEnabled() - ? "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty" - : "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must be specified"; - throw new IllegalArgumentException(message); + throw new IllegalArgumentException( + "[" + INFERENCE_ID_FIELD + "] on mapper [" + leafName() + "] of type [" + CONTENT_TYPE + "] must not be empty" + ); } }); @@ -512,7 +509,7 @@ public boolean fieldHasValue(FieldInfos fieldInfos) { return fieldInfos.fieldInfo(getEmbeddingsFieldName(name())) != null; } - public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost, String queryName) { + public QueryBuilder semanticQuery(InferenceResults inferenceResults, Integer requestSize, float boost, String queryName) { String nestedFieldPath = getChunksFieldName(name()); String inferenceResultsFieldName = getEmbeddingsFieldName(name()); QueryBuilder childQueryBuilder; @@ -556,7 +553,13 @@ public QueryBuilder semanticQuery(InferenceResults inferenceResults, float boost ); } - yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, null, null, null); + Integer k = requestSize; + if (k != null) { + // Ensure that k is at least the default size so that aggregations work when size is set to 0 in the request + k = Math.max(k, DEFAULT_SIZE); + } + + yield new KnnVectorQueryBuilder(inferenceResultsFieldName, inference, k, null, null); } default -> throw new IllegalStateException( "Field [" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java index 75b255df4271c..501b6e6c2bfe2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java @@ -166,7 +166,7 @@ private QueryBuilder doRewriteBuildSemanticQuery(SearchExecutionContext searchEx ); } - return semanticTextFieldType.semanticQuery(inferenceResults, boost(), queryName()); + return semanticTextFieldType.semanticQuery(inferenceResults, searchExecutionContext.requestSize(), boost(), queryName()); } else { throw new IllegalArgumentException( "Field [" + fieldName + "] of type [" + fieldType.typeName() + "] does not support " + NAME + " queries" diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java index 967ad4b46dcb3..83b2a8a0f5182 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestGetInferenceModelAction.java @@ -15,10 +15,7 @@ import org.elasticsearch.rest.ServerlessScope; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.xpack.core.inference.action.GetInferenceModelAction; -import org.elasticsearch.xpack.inference.DefaultElserFeatureFlag; -import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Set; @@ -69,11 +66,6 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient @Override public Set supportedCapabilities() { - Set capabilities = new HashSet<>(); - if (DefaultElserFeatureFlag.isEnabled()) { - capabilities.add(DEFAULT_ELSER_2_CAPABILITY); - } - - return Collections.unmodifiableSet(capabilities); + return Set.of(DEFAULT_ELSER_2_CAPABILITY); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 922b366498c27..fc070965f29c2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -35,7 +35,6 @@ import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate; import org.elasticsearch.xpack.core.ml.utils.MlPlatformArchitecturesUtil; -import org.elasticsearch.xpack.inference.DefaultElserFeatureFlag; import org.elasticsearch.xpack.inference.InferencePlugin; import java.io.IOException; @@ -296,11 +295,6 @@ protected void maybeStartDeployment( InferModelAction.Request request, ActionListener listener ) { - if (DefaultElserFeatureFlag.isEnabled() == false) { - listener.onFailure(e); - return; - } - if (isDefaultId(model.getInferenceEntityId()) && ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { this.start(model, request.getInferenceTimeout(), listener.delegateFailureAndWrap((l, started) -> { client.execute(InferModelAction.INSTANCE, request, listener); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java index b710b24cbda31..b76de5eeedbfc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/CustomElandModel.java @@ -7,14 +7,9 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.ResourceNotFoundException; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.ChunkingSettings; -import org.elasticsearch.inference.Model; import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; public class CustomElandModel extends ElasticsearchInternalModel { @@ -39,31 +34,10 @@ public CustomElandModel( } @Override - public ActionListener getCreateTrainedModelAssignmentActionListener( - Model model, - ActionListener listener - ) { - - return new ActionListener<>() { - @Override - public void onResponse(CreateTrainedModelAssignmentAction.Response response) { - listener.onResponse(Boolean.TRUE); - } - - @Override - public void onFailure(Exception e) { - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { - listener.onFailure( - new ResourceNotFoundException( - "Could not start the inference as the custom eland model [{0}] for this platform cannot be found." - + " Custom models need to be loaded into the cluster with eland before they can be started.", - internalServiceSettings.modelId() - ) - ); - return; - } - listener.onFailure(e); - } - }; + protected String modelNotFoundErrorMessage(String modelId) { + return "Could not deploy model [" + + modelId + + "] as the model cannot be found." + + " Custom models need to be loaded into the cluster with Eland before they can be started."; } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticDeployedModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticDeployedModel.java index 724c7a8f0a166..ce6c6258d0393 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticDeployedModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticDeployedModel.java @@ -36,6 +36,11 @@ public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentA throw new IllegalStateException("cannot start model that uses an existing deployment"); } + @Override + protected String modelNotFoundErrorMessage(String modelId) { + throw new IllegalStateException("cannot start model [" + modelId + "] that uses an existing deployment"); + } + @Override public ActionListener getCreateTrainedModelAssignmentActionListener( Model model, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java index 2405243f302bc..aa12bf0c645c3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalModel.java @@ -7,6 +7,9 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.common.Strings; import org.elasticsearch.core.TimeValue; @@ -15,8 +18,10 @@ import org.elasticsearch.inference.ModelConfigurations; import org.elasticsearch.inference.TaskSettings; import org.elasticsearch.inference.TaskType; +import org.elasticsearch.rest.RestStatus; import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; import org.elasticsearch.xpack.core.ml.action.StartTrainedModelDeploymentAction; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; import static org.elasticsearch.xpack.core.ml.inference.assignment.AllocationStatus.State.STARTED; @@ -79,10 +84,38 @@ public StartTrainedModelDeploymentAction.Request getStartTrainedModelDeploymentA return startRequest; } - public abstract ActionListener getCreateTrainedModelAssignmentActionListener( + public ActionListener getCreateTrainedModelAssignmentActionListener( Model model, ActionListener listener - ); + ) { + return new ActionListener<>() { + @Override + public void onResponse(CreateTrainedModelAssignmentAction.Response response) { + listener.onResponse(Boolean.TRUE); + } + + @Override + public void onFailure(Exception e) { + var cause = ExceptionsHelper.unwrapCause(e); + if (cause instanceof ResourceNotFoundException) { + listener.onFailure(new ResourceNotFoundException(modelNotFoundErrorMessage(internalServiceSettings.modelId()))); + return; + } else if (cause instanceof ElasticsearchStatusException statusException) { + if (statusException.status() == RestStatus.CONFLICT + && statusException.getRootCause() instanceof ResourceAlreadyExistsException) { + // Deployment is already started + listener.onResponse(Boolean.TRUE); + } + return; + } + listener.onFailure(e); + } + }; + } + + protected String modelNotFoundErrorMessage(String modelId) { + return "Could not deploy model [" + modelId + "] as the model cannot be found."; + } public boolean usesExistingDeployment() { return internalServiceSettings.getDeploymentId() != null; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java index 8d2f59171a601..2594f18db3fb5 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java @@ -7,13 +7,8 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.ResourceNotFoundException; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.ChunkingSettings; -import org.elasticsearch.inference.Model; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; public class ElserInternalModel extends ElasticsearchInternalModel { @@ -37,31 +32,4 @@ public ElserInternalServiceSettings getServiceSettings() { public ElserMlNodeTaskSettings getTaskSettings() { return (ElserMlNodeTaskSettings) super.getTaskSettings(); } - - @Override - public ActionListener getCreateTrainedModelAssignmentActionListener( - Model model, - ActionListener listener - ) { - return new ActionListener<>() { - @Override - public void onResponse(CreateTrainedModelAssignmentAction.Response response) { - listener.onResponse(Boolean.TRUE); - } - - @Override - public void onFailure(Exception e) { - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { - listener.onFailure( - new ResourceNotFoundException( - "Could not start the ELSER service as the ELSER model for this platform cannot be found." - + " ELSER needs to be downloaded before it can be started." - ) - ); - return; - } - listener.onFailure(e); - } - }; - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java index fee00d04d940b..2dcf91140c995 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/MultilingualE5SmallModel.java @@ -7,13 +7,8 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; -import org.elasticsearch.ResourceNotFoundException; -import org.elasticsearch.action.ActionListener; import org.elasticsearch.inference.ChunkingSettings; -import org.elasticsearch.inference.Model; import org.elasticsearch.inference.TaskType; -import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; -import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; public class MultilingualE5SmallModel extends ElasticsearchInternalModel { @@ -31,34 +26,4 @@ public MultilingualE5SmallModel( public MultilingualE5SmallInternalServiceSettings getServiceSettings() { return (MultilingualE5SmallInternalServiceSettings) super.getServiceSettings(); } - - @Override - public ActionListener getCreateTrainedModelAssignmentActionListener( - Model model, - ActionListener listener - ) { - - return new ActionListener<>() { - @Override - public void onResponse(CreateTrainedModelAssignmentAction.Response response) { - listener.onResponse(Boolean.TRUE); - } - - @Override - public void onFailure(Exception e) { - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceNotFoundException) { - listener.onFailure( - new ResourceNotFoundException( - "Could not start the TextEmbeddingService service as the " - + "Multilingual-E5-Small model for this platform cannot be found." - + " Multilingual-E5-Small needs to be downloaded before it can be started" - ) - ); - return; - } - listener.onFailure(e); - } - }; - } - } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java index a6015a47a2cf2..4acca8bf30716 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java @@ -61,7 +61,6 @@ import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; -import org.elasticsearch.xpack.inference.DefaultElserFeatureFlag; import org.elasticsearch.xpack.inference.InferencePlugin; import org.elasticsearch.xpack.inference.model.TestModel; import org.junit.AssumptionViolatedException; @@ -103,9 +102,6 @@ protected Collection getPlugins() { @Override protected void minimalMapping(XContentBuilder b) throws IOException { b.field("type", "semantic_text"); - if (DefaultElserFeatureFlag.isEnabled() == false) { - b.field("inference_id", "test_model"); - } } @Override @@ -175,9 +171,7 @@ public void testDefaults() throws Exception { DocumentMapper mapper = mapperService.documentMapper(); assertEquals(Strings.toString(fieldMapping), mapper.mappingSource().toString()); assertSemanticTextField(mapperService, fieldName, false); - if (DefaultElserFeatureFlag.isEnabled()) { - assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, DEFAULT_ELSER_2_INFERENCE_ID); - } + assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, DEFAULT_ELSER_2_INFERENCE_ID); ParsedDocument doc1 = mapper.parse(source(this::writeField)); List fields = doc1.rootDoc().getFields("field"); @@ -211,15 +205,13 @@ public void testSetInferenceEndpoints() throws IOException { assertSerialization.accept(fieldMapping, mapperService); } { - if (DefaultElserFeatureFlag.isEnabled()) { - final XContentBuilder fieldMapping = fieldMapping( - b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) - ); - final MapperService mapperService = createMapperService(fieldMapping); - assertSemanticTextField(mapperService, fieldName, false); - assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, searchInferenceId); - assertSerialization.accept(fieldMapping, mapperService); - } + final XContentBuilder fieldMapping = fieldMapping( + b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, searchInferenceId) + ); + final MapperService mapperService = createMapperService(fieldMapping); + assertSemanticTextField(mapperService, fieldName, false); + assertInferenceEndpoints(mapperService, fieldName, DEFAULT_ELSER_2_INFERENCE_ID, searchInferenceId); + assertSerialization.accept(fieldMapping, mapperService); } { final XContentBuilder fieldMapping = fieldMapping( @@ -246,26 +238,18 @@ public void testInvalidInferenceEndpoints() { ); } { - final String expectedMessage = DefaultElserFeatureFlag.isEnabled() - ? "[inference_id] on mapper [field] of type [semantic_text] must not be empty" - : "[inference_id] on mapper [field] of type [semantic_text] must be specified"; Exception e = expectThrows( MapperParsingException.class, () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(INFERENCE_ID_FIELD, ""))) ); - assertThat(e.getMessage(), containsString(expectedMessage)); + assertThat(e.getMessage(), containsString("[inference_id] on mapper [field] of type [semantic_text] must not be empty")); } { - if (DefaultElserFeatureFlag.isEnabled()) { - Exception e = expectThrows( - MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, ""))) - ); - assertThat( - e.getMessage(), - containsString("[search_inference_id] on mapper [field] of type [semantic_text] must not be empty") - ); - } + Exception e = expectThrows( + MapperParsingException.class, + () -> createMapperService(fieldMapping(b -> b.field("type", "semantic_text").field(SEARCH_INFERENCE_ID_FIELD, ""))) + ); + assertThat(e.getMessage(), containsString("[search_inference_id] on mapper [field] of type [semantic_text] must not be empty")); } } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml index 445df1dc302b9..534e4831c4a0a 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml @@ -551,7 +551,7 @@ setup: --- "Calculates embeddings using the default ELSER 2 endpoint": - requires: - reason: "default ELSER 2 inference ID is behind a feature flag" + reason: "default ELSER 2 inference ID is enabled via a capability" test_runner_features: [capabilities] capabilities: - method: GET @@ -624,3 +624,55 @@ setup: - match: { _source.level_1.dense_field.text: "another inference test" } - exists: _source.level_1.dense_field.inference.chunks.0.embeddings - match: { _source.level_1.dense_field.inference.chunks.0.text: "another inference test" } + +--- +"Deletes on bulk operation": + - requires: + cluster_features: semantic_text.delete_fix + reason: Delete operations are properly applied when subsequent operations include a semantic text field. + + - do: + bulk: + index: test-index + refresh: true + body: | + {"index":{"_id": "1"}} + {"dense_field": ["you know, for testing", "now with chunks"]} + {"index":{"_id": "2"}} + {"dense_field": ["some more tests", "that include chunks"]} + + - do: + search: + index: test-index + body: + query: + semantic: + field: dense_field + query: "you know, for testing" + + - match: { hits.total.value: 2 } + - match: { hits.total.relation: eq } + - match: { hits.hits.0._source.dense_field.text: ["you know, for testing", "now with chunks"] } + - match: { hits.hits.1._source.dense_field.text: ["some more tests", "that include chunks"] } + + - do: + bulk: + index: test-index + refresh: true + body: | + {"delete":{ "_id": "2"}} + {"update":{"_id": "1"}} + {"doc":{"dense_field": "updated text"}} + + - do: + search: + index: test-index + body: + query: + semantic: + field: dense_field + query: "you know, for testing" + + - match: { hits.total.value: 1 } + - match: { hits.total.relation: eq } + - match: { hits.hits.0._source.dense_field.text: "updated text" } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml index 17938f3b61a41..c2704a4c22914 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/40_semantic_text_query.yml @@ -843,7 +843,7 @@ setup: --- "Query a field that uses the default ELSER 2 endpoint": - requires: - reason: "default ELSER 2 inference ID is behind a feature flag" + reason: "default ELSER 2 inference ID is enabled via a capability" test_runner_features: [capabilities] capabilities: - method: GET @@ -878,3 +878,117 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: "doc_1" } + +--- +"Query using a sparse embedding model with size set to zero": + - requires: + cluster_features: "semantic_text.zero_size_fix" + reason: zero size fix added in 8.16.1 & 8.15.5 + + - do: + indices.create: + index: test-sparse-index-with-agg-id + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: sparse-inference-id + non_inference_field: + type: text + agg_id: + type: keyword + + - do: + index: + index: test-sparse-index-with-agg-id + id: doc_1 + body: + inference_field: "inference test" + agg_id: "doc_1" + + - do: + index: + index: test-sparse-index-with-agg-id + id: doc_2 + body: + non_inference_field: "non-inference test" + agg_id: "doc_2" + refresh: true + + - do: + search: + index: test-sparse-index-with-agg-id + body: + size: 0 + query: + semantic: + field: "inference_field" + query: "inference test" + aggs: + agg_ids: + terms: + field: agg_id + + - match: { hits.total.value: 1 } + - length: { hits.hits: 0 } + - length: { aggregations.agg_ids.buckets: 1 } + - match: { aggregations.agg_ids.buckets.0.key: "doc_1" } + - match: { aggregations.agg_ids.buckets.0.doc_count: 1 } + +--- +"Query using a dense embedding model with size set to zero": + - requires: + cluster_features: "semantic_text.zero_size_fix" + reason: zero size fix added in 8.16.1 & 8.15.5 + + - do: + indices.create: + index: test-dense-index-with-agg-id + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + non_inference_field: + type: text + agg_id: + type: keyword + + - do: + index: + index: test-dense-index-with-agg-id + id: doc_1 + body: + inference_field: "inference test" + agg_id: "doc_1" + + - do: + index: + index: test-dense-index-with-agg-id + id: doc_2 + body: + non_inference_field: "non-inference test" + agg_id: "doc_2" + refresh: true + + - do: + search: + index: test-dense-index-with-agg-id + body: + size: 0 + query: + semantic: + field: "inference_field" + query: "inference test" + aggs: + agg_ids: + terms: + field: agg_id + + - match: { hits.total.value: 1 } + - length: { hits.hits: 0 } + - length: { aggregations.agg_ids.buckets: 1 } + - match: { aggregations.agg_ids.buckets.0.key: "doc_1" } + - match: { aggregations.agg_ids.buckets.0.doc_count: 1 } diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/DownloadTaskRemovedListener.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/DownloadTaskRemovedListener.java new file mode 100644 index 0000000000000..929dac6ee357a --- /dev/null +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/DownloadTaskRemovedListener.java @@ -0,0 +1,29 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.packageloader.action; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.tasks.RemovedTaskListener; +import org.elasticsearch.tasks.Task; + +public record DownloadTaskRemovedListener(ModelDownloadTask trackedTask, ActionListener listener) + implements + RemovedTaskListener { + + @Override + public void onRemoved(Task task) { + if (task.getId() == trackedTask.getId()) { + if (trackedTask.getTaskException() == null) { + listener.onResponse(AcknowledgedResponse.TRUE); + } else { + listener.onFailure(trackedTask.getTaskException()); + } + } + } +} diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTask.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTask.java index 59977bd418e11..dd09c3cf65fec 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTask.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/ModelDownloadTask.java @@ -13,6 +13,7 @@ import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.ml.MlTasks; import java.io.IOException; import java.util.Map; @@ -51,9 +52,12 @@ public void writeTo(StreamOutput out) throws IOException { } private final AtomicReference downloadProgress = new AtomicReference<>(new DownLoadProgress(0, 0)); + private final String modelId; + private volatile Exception taskException; - public ModelDownloadTask(long id, String type, String action, String description, TaskId parentTaskId, Map headers) { - super(id, type, action, description, parentTaskId, headers); + public ModelDownloadTask(long id, String type, String action, String modelId, TaskId parentTaskId, Map headers) { + super(id, type, action, taskDescription(modelId), parentTaskId, headers); + this.modelId = modelId; } void setProgress(int totalParts, int downloadedParts) { @@ -65,4 +69,19 @@ public DownloadStatus getStatus() { return new DownloadStatus(downloadProgress.get()); } + public String getModelId() { + return modelId; + } + + public void setTaskException(Exception exception) { + this.taskException = exception; + } + + public Exception getTaskException() { + return taskException; + } + + public static String taskDescription(String modelId) { + return MlTasks.downloadModelTaskDescription(modelId); + } } diff --git a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java index 76b7781b1cffe..2a14a8761e357 100644 --- a/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java +++ b/x-pack/plugin/ml-package-loader/src/main/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackage.java @@ -30,7 +30,6 @@ import org.elasticsearch.tasks.TaskAwareRequest; import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.tasks.TaskId; -import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.common.notifications.Level; @@ -42,6 +41,9 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; @@ -49,7 +51,6 @@ import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN; import static org.elasticsearch.xpack.core.ml.MlTasks.MODEL_IMPORT_TASK_ACTION; import static org.elasticsearch.xpack.core.ml.MlTasks.MODEL_IMPORT_TASK_TYPE; -import static org.elasticsearch.xpack.core.ml.MlTasks.downloadModelTaskDescription; public class TransportLoadTrainedModelPackage extends TransportMasterNodeAction { @@ -57,6 +58,7 @@ public class TransportLoadTrainedModelPackage extends TransportMasterNodeAction< private final Client client; private final CircuitBreakerService circuitBreakerService; + final Map> taskRemovedListenersByModelId; @Inject public TransportLoadTrainedModelPackage( @@ -81,6 +83,7 @@ public TransportLoadTrainedModelPackage( ); this.client = new OriginSettingClient(client, ML_ORIGIN); this.circuitBreakerService = circuitBreakerService; + taskRemovedListenersByModelId = new HashMap<>(); } @Override @@ -91,6 +94,12 @@ protected ClusterBlockException checkBlock(Request request, ClusterState state) @Override protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) throws Exception { + if (handleDownloadInProgress(request.getModelId(), request.isWaitForCompletion(), listener)) { + logger.debug("Existing download of model [{}] in progress", request.getModelId()); + // download in progress, nothing to do + return; + } + ModelDownloadTask downloadTask = createDownloadTask(request); try { @@ -107,7 +116,7 @@ protected void masterOperation(Task task, Request request, ClusterState state, A var downloadCompleteListener = request.isWaitForCompletion() ? listener : ActionListener.noop(); - importModel(client, taskManager, request, modelImporter, downloadCompleteListener, downloadTask); + importModel(client, () -> unregisterTask(downloadTask), request, modelImporter, downloadTask, downloadCompleteListener); } catch (Exception e) { taskManager.unregister(downloadTask); listener.onFailure(e); @@ -124,22 +133,91 @@ private ParentTaskAssigningClient getParentTaskAssigningClient(Task originTask) return new ParentTaskAssigningClient(client, parentTaskId); } + /** + * Look for a current download task of the model and optionally wait + * for that task to complete if there is one. + * synchronized with {@code unregisterTask} to prevent the task being + * removed before the remove listener is added. + * @param modelId Model being downloaded + * @param isWaitForCompletion Wait until the download completes before + * calling the listener + * @param listener Model download listener + * @return True if a download task is in progress + */ + synchronized boolean handleDownloadInProgress( + String modelId, + boolean isWaitForCompletion, + ActionListener listener + ) { + var description = ModelDownloadTask.taskDescription(modelId); + var tasks = taskManager.getCancellableTasks().values(); + + ModelDownloadTask inProgress = null; + for (var task : tasks) { + if (description.equals(task.getDescription()) && task instanceof ModelDownloadTask downloadTask) { + inProgress = downloadTask; + break; + } + } + + if (inProgress != null) { + if (isWaitForCompletion == false) { + // Not waiting for the download to complete, it is enough that the download is in progress + // Respond now not when the download completes + listener.onResponse(AcknowledgedResponse.TRUE); + return true; + } + // Otherwise register a task removed listener which is called + // once the tasks is complete and unregistered + var tracker = new DownloadTaskRemovedListener(inProgress, listener); + taskRemovedListenersByModelId.computeIfAbsent(modelId, s -> new ArrayList<>()).add(tracker); + taskManager.registerRemovedTaskListener(tracker); + return true; + } + + return false; + } + + /** + * Unregister the completed task triggering any remove task listeners. + * This method is synchronized to prevent the task being removed while + * {@code waitForExistingDownload} is in progress. + * @param task The completed task + */ + synchronized void unregisterTask(ModelDownloadTask task) { + taskManager.unregister(task); // unregister will call the on remove function + + var trackers = taskRemovedListenersByModelId.remove(task.getModelId()); + if (trackers != null) { + for (var tracker : trackers) { + taskManager.unregisterRemovedTaskListener(tracker); + } + } + } + /** * This is package scope so that we can test the logic directly. - * This should only be called from the masterOperation method and the tests + * This should only be called from the masterOperation method and the tests. + * This method is static for testing. * * @param auditClient a client which should only be used to send audit notifications. This client cannot be associated with the passed * in task, that way when the task is cancelled the notification requests can * still be performed. If it is associated with the task (i.e. via ParentTaskAssigningClient), * then the requests will throw a TaskCancelledException. + * @param unregisterTaskFn Runnable to unregister the task. Because this is a static function + * a lambda is used rather than the instance method. + * @param request The download request + * @param modelImporter The importer + * @param task Download task + * @param listener Listener */ static void importModel( Client auditClient, - TaskManager taskManager, + Runnable unregisterTaskFn, Request request, ModelImporter modelImporter, - ActionListener listener, - Task task + ModelDownloadTask task, + ActionListener listener ) { final String modelId = request.getModelId(); final long relativeStartNanos = System.nanoTime(); @@ -155,9 +233,12 @@ static void importModel( Level.INFO ); listener.onResponse(AcknowledgedResponse.TRUE); - }, exception -> listener.onFailure(processException(auditClient, modelId, exception))); + }, exception -> { + task.setTaskException(exception); + listener.onFailure(processException(auditClient, modelId, exception)); + }); - modelImporter.doImport(ActionListener.runAfter(finishListener, () -> taskManager.unregister(task))); + modelImporter.doImport(ActionListener.runAfter(finishListener, unregisterTaskFn)); } static Exception processException(Client auditClient, String modelId, Exception e) { @@ -197,14 +278,7 @@ public TaskId getParentTask() { @Override public ModelDownloadTask createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new ModelDownloadTask( - id, - type, - action, - downloadModelTaskDescription(request.getModelId()), - parentTaskId, - headers - ); + return new ModelDownloadTask(id, type, action, request.getModelId(), parentTaskId, headers); } }, false); } diff --git a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java index cbcfd5b760779..3486ce6af0db5 100644 --- a/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java +++ b/x-pack/plugin/ml-package-loader/src/test/java/org/elasticsearch/xpack/ml/packageloader/action/TransportLoadTrainedModelPackageTests.java @@ -10,13 +10,19 @@ import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskCancelledException; +import org.elasticsearch.tasks.TaskId; import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.common.notifications.Level; import org.elasticsearch.xpack.core.ml.action.AuditMlNotificationAction; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ModelPackageConfig; @@ -27,9 +33,13 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URISyntaxException; +import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.xpack.core.ml.MlTasks.MODEL_IMPORT_TASK_ACTION; +import static org.elasticsearch.xpack.core.ml.MlTasks.MODEL_IMPORT_TASK_TYPE; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.core.Is.is; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; @@ -37,6 +47,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class TransportLoadTrainedModelPackageTests extends ESTestCase { private static final String MODEL_IMPORT_FAILURE_MSG_FORMAT = "Model importing failed due to %s [%s]"; @@ -44,17 +55,10 @@ public class TransportLoadTrainedModelPackageTests extends ESTestCase { public void testSendsFinishedUploadNotification() { var uploader = createUploader(null); var taskManager = mock(TaskManager.class); - var task = mock(Task.class); + var task = mock(ModelDownloadTask.class); var client = mock(Client.class); - TransportLoadTrainedModelPackage.importModel( - client, - taskManager, - createRequestWithWaiting(), - uploader, - ActionListener.noop(), - task - ); + TransportLoadTrainedModelPackage.importModel(client, () -> {}, createRequestWithWaiting(), uploader, task, ActionListener.noop()); var notificationArg = ArgumentCaptor.forClass(AuditMlNotificationAction.Request.class); // 2 notifications- the start and finish messages @@ -108,32 +112,63 @@ public void testSendsWarningNotificationForTaskCancelledException() throws Excep public void testCallsOnResponseWithAcknowledgedResponse() throws Exception { var client = mock(Client.class); var taskManager = mock(TaskManager.class); - var task = mock(Task.class); + var task = mock(ModelDownloadTask.class); ModelImporter uploader = createUploader(null); var responseRef = new AtomicReference(); var listener = ActionListener.wrap(responseRef::set, e -> fail("received an exception: " + e.getMessage())); - TransportLoadTrainedModelPackage.importModel(client, taskManager, createRequestWithWaiting(), uploader, listener, task); + TransportLoadTrainedModelPackage.importModel(client, () -> {}, createRequestWithWaiting(), uploader, task, listener); assertThat(responseRef.get(), is(AcknowledgedResponse.TRUE)); } public void testDoesNotCallListenerWhenNotWaitingForCompletion() { var uploader = mock(ModelImporter.class); var client = mock(Client.class); - var taskManager = mock(TaskManager.class); - var task = mock(Task.class); - + var task = mock(ModelDownloadTask.class); TransportLoadTrainedModelPackage.importModel( client, - taskManager, + () -> {}, createRequestWithoutWaiting(), uploader, - ActionListener.running(ESTestCase::fail), - task + task, + ActionListener.running(ESTestCase::fail) ); } + public void testWaitForExistingDownload() { + var taskManager = mock(TaskManager.class); + var modelId = "foo"; + var task = new ModelDownloadTask(1L, MODEL_IMPORT_TASK_TYPE, MODEL_IMPORT_TASK_ACTION, modelId, new TaskId("node", 1L), Map.of()); + when(taskManager.getCancellableTasks()).thenReturn(Map.of(1L, task)); + + var transportService = mock(TransportService.class); + when(transportService.getTaskManager()).thenReturn(taskManager); + + var action = new TransportLoadTrainedModelPackage( + transportService, + mock(ClusterService.class), + mock(ThreadPool.class), + mock(ActionFilters.class), + mock(IndexNameExpressionResolver.class), + mock(Client.class), + mock(CircuitBreakerService.class) + ); + + assertTrue(action.handleDownloadInProgress(modelId, true, ActionListener.noop())); + verify(taskManager).registerRemovedTaskListener(any()); + assertThat(action.taskRemovedListenersByModelId.entrySet(), hasSize(1)); + assertThat(action.taskRemovedListenersByModelId.get(modelId), hasSize(1)); + + // With wait for completion == false no new removed listener will be added + assertTrue(action.handleDownloadInProgress(modelId, false, ActionListener.noop())); + verify(taskManager, times(1)).registerRemovedTaskListener(any()); + assertThat(action.taskRemovedListenersByModelId.entrySet(), hasSize(1)); + assertThat(action.taskRemovedListenersByModelId.get(modelId), hasSize(1)); + + assertFalse(action.handleDownloadInProgress("no-task-for-this-one", randomBoolean(), ActionListener.noop())); + } + private void assertUploadCallsOnFailure(Exception exception, String message, RestStatus status, Level level) throws Exception { var esStatusException = new ElasticsearchStatusException(message, status, exception); @@ -152,7 +187,7 @@ private void assertNotificationAndOnFailure( ) throws Exception { var client = mock(Client.class); var taskManager = mock(TaskManager.class); - var task = mock(Task.class); + var task = mock(ModelDownloadTask.class); ModelImporter uploader = createUploader(thrownException); var failureRef = new AtomicReference(); @@ -160,7 +195,14 @@ private void assertNotificationAndOnFailure( (AcknowledgedResponse response) -> { fail("received a acknowledged response: " + response.toString()); }, failureRef::set ); - TransportLoadTrainedModelPackage.importModel(client, taskManager, createRequestWithWaiting(), uploader, listener, task); + TransportLoadTrainedModelPackage.importModel( + client, + () -> taskManager.unregister(task), + createRequestWithWaiting(), + uploader, + task, + listener + ); var notificationArg = ArgumentCaptor.forClass(AuditMlNotificationAction.Request.class); // 2 notifications- the starting message and the failure diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java index 5fd70ce71cd24..f01372ca4f246 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportStartTrainedModelDeploymentAction.java @@ -190,11 +190,11 @@ protected void masterOperation( () -> "[" + request.getDeploymentId() + "] creating new assignment for model [" + request.getModelId() + "] failed", e ); - if (ExceptionsHelper.unwrapCause(e) instanceof ResourceAlreadyExistsException) { + if (ExceptionsHelper.unwrapCause(e) instanceof ResourceAlreadyExistsException resourceAlreadyExistsException) { e = new ElasticsearchStatusException( "Cannot start deployment [{}] because it has already been started", RestStatus.CONFLICT, - e, + resourceAlreadyExistsException, request.getDeploymentId() ); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java index e55736cf43607..5b1ed7c954fe9 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/CategorizeTextAggregator.java @@ -13,6 +13,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.core.Releasables; @@ -110,31 +111,32 @@ protected void doClose() { } @Override - public InternalAggregation[] buildAggregations(long[] ordsToCollect) throws IOException { - Bucket[][] topBucketsPerOrd = new Bucket[ordsToCollect.length][]; - for (int ordIdx = 0; ordIdx < ordsToCollect.length; ordIdx++) { - final long ord = ordsToCollect[ordIdx]; - final TokenListCategorizer categorizer = (ord < categorizers.size()) ? categorizers.get(ord) : null; - if (categorizer == null) { - topBucketsPerOrd[ordIdx] = new Bucket[0]; - continue; + public InternalAggregation[] buildAggregations(LongArray ordsToCollect) throws IOException { + try (ObjectArray topBucketsPerOrd = bigArrays().newObjectArray(ordsToCollect.size())) { + for (long ordIdx = 0; ordIdx < ordsToCollect.size(); ordIdx++) { + final long ord = ordsToCollect.get(ordIdx); + final TokenListCategorizer categorizer = (ord < categorizers.size()) ? categorizers.get(ord) : null; + if (categorizer == null) { + topBucketsPerOrd.set(ordIdx, new Bucket[0]); + continue; + } + int size = (int) Math.min(bucketOrds.bucketsInOrd(ordIdx), bucketCountThresholds.getShardSize()); + topBucketsPerOrd.set(ordIdx, categorizer.toOrderedBuckets(size)); } - int size = (int) Math.min(bucketOrds.bucketsInOrd(ordIdx), bucketCountThresholds.getShardSize()); - topBucketsPerOrd[ordIdx] = categorizer.toOrderedBuckets(size); - } - buildSubAggsForAllBuckets(topBucketsPerOrd, Bucket::getBucketOrd, Bucket::setAggregations); - InternalAggregation[] results = new InternalAggregation[ordsToCollect.length]; - for (int ordIdx = 0; ordIdx < ordsToCollect.length; ordIdx++) { - results[ordIdx] = new InternalCategorizationAggregation( - name, - bucketCountThresholds.getRequiredSize(), - bucketCountThresholds.getMinDocCount(), - similarityThreshold, - metadata(), - Arrays.asList(topBucketsPerOrd[ordIdx]) - ); + buildSubAggsForAllBuckets(topBucketsPerOrd, Bucket::getBucketOrd, Bucket::setAggregations); + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(ordsToCollect.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { + results[ordIdx] = new InternalCategorizationAggregation( + name, + bucketCountThresholds.getRequiredSize(), + bucketCountThresholds.getMinDocCount(), + similarityThreshold, + metadata(), + Arrays.asList(topBucketsPerOrd.get(ordIdx)) + ); + } + return results; } - return results; } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/DelegatingCircuitBreakerService.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/DelegatingCircuitBreakerService.java index 350f45afb9e1f..1b28ebbb3eec6 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/DelegatingCircuitBreakerService.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/DelegatingCircuitBreakerService.java @@ -40,10 +40,12 @@ * At the time of writing circuit breakers are a global gauge.) * * After the map phase and before reduce, the {@link ItemSetMapReduceAggregator} creates instances of - * {@link InternalItemSetMapReduceAggregation}, see {@link ItemSetMapReduceAggregator#buildAggregations(long[])}. + * {@link InternalItemSetMapReduceAggregation}, see + * {@link ItemSetMapReduceAggregator#buildAggregations(org.elasticsearch.common.util.LongArray)}. * * (Note 1: Instead of keeping the existing instance, it would have been possible to deep-copy the object like - * {@link CardinalityAggregator#buildAggregations(long[])}. I decided against this approach mainly because the deep-copy isn't + * {@link CardinalityAggregator#buildAggregations(org.elasticsearch.common.util.LongArray)}. + * I decided against this approach mainly because the deep-copy isn't * secured by circuit breakers, meaning the node could run out of memory during the deep-copy.) * (Note 2: Between {@link ItemSetMapReduceAggregator#doClose()} and serializing {@link InternalItemSetMapReduceAggregation} * memory accounting is broken, meaning the agg context gets closed and bytes get returned to the circuit breaker before memory is diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java index 0f9555c77341f..1a5e5d7a0790e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java @@ -17,6 +17,7 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.LongObjectPagedHashMap; import org.elasticsearch.core.Releasables; import org.elasticsearch.core.Tuple; @@ -117,9 +118,9 @@ public InternalAggregation buildEmptyAggregation() { } @Override - public final InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { - InternalAggregation[] results = new InternalAggregation[owningBucketOrds.length]; - for (int ordIdx = 0; ordIdx < owningBucketOrds.length; ordIdx++) { + public final InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + InternalAggregation[] results = new InternalAggregation[Math.toIntExact(owningBucketOrds.size())]; + for (int ordIdx = 0; ordIdx < results.length; ordIdx++) { results[ordIdx] = buildAggregation(ordIdx); } diff --git a/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java b/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java new file mode 100644 index 0000000000000..3049fe830e728 --- /dev/null +++ b/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java @@ -0,0 +1,281 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.searchablesnapshots.s3; + +import fixture.s3.S3HttpFixture; +import io.netty.handler.codec.http.HttpMethod; + +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.entity.ContentType; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.client.WarningsHandler; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.MutableSettingsProvider; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.test.rest.ObjectPath; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.function.UnaryOperator; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.Matchers.allOf; + +public class S3SearchableSnapshotsCredentialsReloadIT extends ESRestTestCase { + + private static final String BUCKET = "S3SearchableSnapshotsCredentialsReloadIT-bucket"; + private static final String BASE_PATH = "S3SearchableSnapshotsCredentialsReloadIT-base-path"; + + public static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, "ignored"); + + private static final MutableSettingsProvider keystoreSettings = new MutableSettingsProvider(); + + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .setting("xpack.license.self_generated.type", "trial") + .keystore(keystoreSettings) + .setting("xpack.searchable.snapshot.shared_cache.size", "4kB") + .setting("xpack.searchable.snapshot.shared_cache.region_size", "4kB") + .setting("xpack.searchable_snapshots.cache_fetch_async_thread_pool.keep_alive", "0ms") + .setting("xpack.security.enabled", "false") + .systemProperty("es.allow_insecure_settings", "true") + .build(); + + @ClassRule + public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Before + public void skipFips() { + assumeFalse("getting these tests to run in a FIPS JVM is kinda fiddly and we don't really need the extra coverage", inFipsJvm()); + } + + public void testReloadCredentialsFromKeystore() throws IOException { + final TestHarness testHarness = new TestHarness(); + testHarness.putRepository(); + + // Set up initial credentials + final String accessKey1 = randomIdentifier(); + s3Fixture.setAccessKey(accessKey1); + keystoreSettings.put("s3.client.default.access_key", accessKey1); + keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); + cluster.updateStoredSecureSettings(); + assertOK(client().performRequest(new Request("POST", "/_nodes/reload_secure_settings"))); + + testHarness.createFrozenSearchableSnapshotIndex(); + + // Verify searchable snapshot functionality + testHarness.ensureSearchSuccess(); + + // Rotate credentials in blob store + logger.info("--> rotate credentials"); + final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); + s3Fixture.setAccessKey(accessKey2); + + // Ensure searchable snapshot now does not work due to invalid credentials + logger.info("--> expect failure"); + testHarness.ensureSearchFailure(); + + // Set up refreshed credentials + logger.info("--> update keystore contents"); + keystoreSettings.put("s3.client.default.access_key", accessKey2); + cluster.updateStoredSecureSettings(); + assertOK(client().performRequest(new Request("POST", "/_nodes/reload_secure_settings"))); + + // Check access using refreshed credentials + logger.info("--> expect success"); + testHarness.ensureSearchSuccess(); + } + + public void testReloadCredentialsFromAlternativeClient() throws IOException { + final TestHarness testHarness = new TestHarness(); + testHarness.putRepository(); + + // Set up credentials + final String accessKey1 = randomIdentifier(); + final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); + final String alternativeClient = randomValueOtherThan("default", ESTestCase::randomIdentifier); + + s3Fixture.setAccessKey(accessKey1); + keystoreSettings.put("s3.client.default.access_key", accessKey1); + keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); + keystoreSettings.put("s3.client." + alternativeClient + ".access_key", accessKey2); + keystoreSettings.put("s3.client." + alternativeClient + ".secret_key", randomIdentifier()); + cluster.updateStoredSecureSettings(); + assertOK(client().performRequest(new Request("POST", "/_nodes/reload_secure_settings"))); + + testHarness.createFrozenSearchableSnapshotIndex(); + + // Verify searchable snapshot functionality + testHarness.ensureSearchSuccess(); + + // Rotate credentials in blob store + logger.info("--> rotate credentials"); + s3Fixture.setAccessKey(accessKey2); + + // Ensure searchable snapshot now does not work due to invalid credentials + logger.info("--> expect failure"); + testHarness.ensureSearchFailure(); + + // Adjust repository to use new client + logger.info("--> update repository metadata"); + testHarness.putRepository(b -> b.put("client", alternativeClient)); + + // Check access using refreshed credentials + logger.info("--> expect success"); + testHarness.ensureSearchSuccess(); + } + + public void testReloadCredentialsFromMetadata() throws IOException { + final TestHarness testHarness = new TestHarness(); + testHarness.warningsHandler = WarningsHandler.PERMISSIVE; + + // Set up credentials + final String accessKey1 = randomIdentifier(); + final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); + + testHarness.putRepository(b -> b.put("access_key", accessKey1).put("secret_key", randomIdentifier())); + s3Fixture.setAccessKey(accessKey1); + + testHarness.createFrozenSearchableSnapshotIndex(); + + // Verify searchable snapshot functionality + testHarness.ensureSearchSuccess(); + + // Rotate credentials in blob store + logger.info("--> rotate credentials"); + s3Fixture.setAccessKey(accessKey2); + + // Ensure searchable snapshot now does not work due to invalid credentials + logger.info("--> expect failure"); + testHarness.ensureSearchFailure(); + + // Adjust repository to use new client + logger.info("--> update repository metadata"); + testHarness.putRepository(b -> b.put("access_key", accessKey2).put("secret_key", randomIdentifier())); + + // Check access using refreshed credentials + logger.info("--> expect success"); + testHarness.ensureSearchSuccess(); + } + + private class TestHarness { + private final String mountedIndexName = randomIdentifier(); + private final String repositoryName = randomIdentifier(); + + @Nullable // to use the default + WarningsHandler warningsHandler; + + void putRepository() throws IOException { + putRepository(UnaryOperator.identity()); + } + + void putRepository(UnaryOperator settingsOperator) throws IOException { + // Register repository + final Request request = newXContentRequest( + HttpMethod.PUT, + "/_snapshot/" + repositoryName, + (b, p) -> b.field("type", "s3") + .startObject("settings") + .value( + settingsOperator.apply( + Settings.builder().put("bucket", BUCKET).put("base_path", BASE_PATH).put("endpoint", s3Fixture.getAddress()) + ).build() + ) + .endObject() + ); + request.addParameter("verify", "false"); // because we don't have access to the blob store yet + request.setOptions(RequestOptions.DEFAULT.toBuilder().setWarningsHandler(warningsHandler)); + assertOK(client().performRequest(request)); + } + + void createFrozenSearchableSnapshotIndex() throws IOException { + // Create an index, large enough that its data is not all captured in the file headers + final String indexName = randomValueOtherThan(mountedIndexName, ESTestCase::randomIdentifier); + createIndex(indexName, indexSettings(1, 0).build()); + try (var bodyStream = new ByteArrayOutputStream()) { + for (int i = 0; i < 1024; i++) { + try (XContentBuilder bodyLineBuilder = new XContentBuilder(XContentType.JSON.xContent(), bodyStream)) { + bodyLineBuilder.startObject().startObject("index").endObject().endObject(); + } + bodyStream.write(0x0a); + try (XContentBuilder bodyLineBuilder = new XContentBuilder(XContentType.JSON.xContent(), bodyStream)) { + bodyLineBuilder.startObject().field("foo", "bar").endObject(); + } + bodyStream.write(0x0a); + } + bodyStream.flush(); + final Request request = new Request("PUT", indexName + "/_bulk"); + request.setEntity(new ByteArrayEntity(bodyStream.toByteArray(), ContentType.APPLICATION_JSON)); + client().performRequest(request); + } + + // Take a snapshot and delete the original index + final String snapshotName = randomIdentifier(); + final Request createSnapshotRequest = new Request(HttpPut.METHOD_NAME, "_snapshot/" + repositoryName + '/' + snapshotName); + createSnapshotRequest.addParameter("wait_for_completion", "true"); + createSnapshotRequest.setOptions(RequestOptions.DEFAULT.toBuilder().setWarningsHandler(warningsHandler)); + assertOK(client().performRequest(createSnapshotRequest)); + + deleteIndex(indexName); + + // Mount the snapshotted index as a searchable snapshot + final Request mountRequest = newXContentRequest( + HttpMethod.POST, + "/_snapshot/" + repositoryName + "/" + snapshotName + "/_mount", + (b, p) -> b.field("index", indexName).field("renamed_index", mountedIndexName) + ); + mountRequest.addParameter("wait_for_completion", "true"); + mountRequest.addParameter("storage", "shared_cache"); + assertOK(client().performRequest(mountRequest)); + ensureGreen(mountedIndexName); + } + + void ensureSearchSuccess() throws IOException { + final Request searchRequest = new Request("GET", mountedIndexName + "/_search"); + searchRequest.addParameter("size", "10000"); + assertEquals( + "bar", + ObjectPath.createFromResponse(assertOK(client().performRequest(searchRequest))).evaluate("hits.hits.0._source.foo") + ); + } + + void ensureSearchFailure() throws IOException { + assertOK(client().performRequest(new Request("POST", "/_searchable_snapshots/cache/clear"))); + final Request searchRequest = new Request("GET", mountedIndexName + "/_search"); + searchRequest.addParameter("size", "10000"); + assertThat( + expectThrows(ResponseException.class, () -> client().performRequest(searchRequest)).getMessage(), + allOf( + containsString("Bad access key"), + containsString("Status Code: 403"), + containsString("Error Code: AccessDenied"), + containsString("failed to read data from cache") + ) + ); + } + } + +} diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java index 4eea006b4c2f2..67768c9aadded 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshots.java @@ -547,9 +547,9 @@ public Map getRecoveryStateFactories() { return Map.of(SNAPSHOT_RECOVERY_STATE_FACTORY_KEY, SearchableSnapshotRecoveryState::new); } - public static final String CACHE_FETCH_ASYNC_THREAD_POOL_NAME = "searchable_snapshots_cache_fetch_async"; + public static final String CACHE_FETCH_ASYNC_THREAD_POOL_NAME = BlobStoreRepository.SEARCHABLE_SNAPSHOTS_CACHE_FETCH_ASYNC_THREAD_NAME; public static final String CACHE_FETCH_ASYNC_THREAD_POOL_SETTING = "xpack.searchable_snapshots.cache_fetch_async_thread_pool"; - public static final String CACHE_PREWARMING_THREAD_POOL_NAME = "searchable_snapshots_cache_prewarming"; + public static final String CACHE_PREWARMING_THREAD_POOL_NAME = BlobStoreRepository.SEARCHABLE_SNAPSHOTS_CACHE_PREWARMING_THREAD_NAME; public static final String CACHE_PREWARMING_THREAD_POOL_SETTING = "xpack.searchable_snapshots.cache_prewarming_thread_pool"; public static ScalingExecutorBuilder[] executorBuilders(Settings settings) { diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/BlobContainerSupplier.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/BlobContainerSupplier.java new file mode 100644 index 0000000000000..335c8e311ace6 --- /dev/null +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/BlobContainerSupplier.java @@ -0,0 +1,95 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.searchablesnapshots.store; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.common.blobstore.BlobContainer; +import org.elasticsearch.common.blobstore.OperationPurpose; +import org.elasticsearch.common.blobstore.support.FilterBlobContainer; +import org.elasticsearch.repositories.IndexId; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; + +import java.io.IOException; +import java.io.InputStream; +import java.util.function.Supplier; + +public class BlobContainerSupplier implements Supplier { + + private static final Logger logger = LogManager.getLogger(BlobContainerSupplier.class); + + private final Supplier repositorySupplier; + private final IndexId indexId; + private final int shardId; + + private volatile LastKnownState lastKnownState = new LastKnownState(null, null); + + public BlobContainerSupplier(Supplier repositorySupplier, IndexId indexId, int shardId) { + this.repositorySupplier = repositorySupplier; + this.indexId = indexId; + this.shardId = shardId; + } + + @Override + public BlobContainer get() { + final LastKnownState lastKnownState = this.lastKnownState; + final BlobStoreRepository currentRepository = repositorySupplier.get(); + + if (lastKnownState.blobStoreRepository() == currentRepository) { + return lastKnownState.blobContainer(); + } else { + return refreshAndGet(); + } + } + + private synchronized BlobContainer refreshAndGet() { + final BlobStoreRepository currentRepository = repositorySupplier.get(); + if (lastKnownState.blobStoreRepository() == currentRepository) { + return lastKnownState.blobContainer(); + } else { + logger.debug("creating new blob container [{}][{}][{}]", currentRepository.getMetadata().name(), indexId, shardId); + final BlobContainer newContainer = new RateLimitingBlobContainer( + currentRepository, + currentRepository.shardContainer(indexId, shardId) + ); + lastKnownState = new LastKnownState(currentRepository, newContainer); + return newContainer; + } + } + + private record LastKnownState(BlobStoreRepository blobStoreRepository, BlobContainer blobContainer) {} + + /** + * A {@link FilterBlobContainer} that uses {@link BlobStoreRepository#maybeRateLimitRestores(InputStream)} to limit the rate at which + * blobs are read from the repository. + */ + private static class RateLimitingBlobContainer extends FilterBlobContainer { + + private final BlobStoreRepository blobStoreRepository; + + RateLimitingBlobContainer(BlobStoreRepository blobStoreRepository, BlobContainer blobContainer) { + super(blobContainer); + this.blobStoreRepository = blobStoreRepository; + } + + @Override + protected BlobContainer wrapChild(BlobContainer child) { + return new RateLimitingBlobContainer(blobStoreRepository, child); + } + + @Override + public InputStream readBlob(OperationPurpose purpose, String blobName) throws IOException { + return blobStoreRepository.maybeRateLimitRestores(super.readBlob(purpose, blobName)); + } + + @Override + public InputStream readBlob(OperationPurpose purpose, String blobName, long position, long length) throws IOException { + return blobStoreRepository.maybeRateLimitRestores(super.readBlob(purpose, blobName, position, length)); + } + } +} diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/RepositorySupplier.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/RepositorySupplier.java new file mode 100644 index 0000000000000..63522ce2309a1 --- /dev/null +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/RepositorySupplier.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.searchablesnapshots.store; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.repositories.RepositoriesService; +import org.elasticsearch.repositories.Repository; +import org.elasticsearch.repositories.RepositoryMissingException; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; +import org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshots; + +import java.util.Map; +import java.util.Objects; +import java.util.function.Supplier; + +public class RepositorySupplier implements Supplier { + + private static final Logger logger = LogManager.getLogger(BlobContainerSupplier.class); + + private final RepositoriesService repositoriesService; + + private final String repositoryName; + + @Nullable // if repository specified only by name + private final String repositoryUuid; + + private volatile String repositoryNameHint; + + public RepositorySupplier(RepositoriesService repositoriesService, String repositoryName, String repositoryUuid) { + this.repositoriesService = Objects.requireNonNull(repositoriesService); + this.repositoryName = Objects.requireNonNull(repositoryName); + this.repositoryUuid = repositoryUuid; + this.repositoryNameHint = repositoryName; + } + + @Override + public BlobStoreRepository get() { + return SearchableSnapshots.getSearchableRepository(getRepository()); + } + + private Repository getRepository() { + if (repositoryUuid == null) { + // repository containing pre-7.12 snapshots has no UUID so we assume it matches by name + final Repository repository = repositoriesService.repository(repositoryName); + assert repository.getMetadata().name().equals(repositoryName) : repository.getMetadata().name() + " vs " + repositoryName; + return repository; + } + + final Map repositoriesByName = repositoriesService.getRepositories(); + + final String currentRepositoryNameHint = repositoryNameHint; + final Repository repositoryByLastKnownName = repositoriesByName.get(currentRepositoryNameHint); + if (repositoryByLastKnownName != null) { + final var foundRepositoryUuid = repositoryByLastKnownName.getMetadata().uuid(); + if (Objects.equals(repositoryUuid, foundRepositoryUuid)) { + return repositoryByLastKnownName; + } + } + + for (final Repository repository : repositoriesByName.values()) { + if (repository.getMetadata().uuid().equals(repositoryUuid)) { + final var newRepositoryName = repository.getMetadata().name(); + logger.debug( + "getRepository: repository [{}] with uuid [{}] replacing repository [{}]", + newRepositoryName, + repositoryUuid, + currentRepositoryNameHint + ); + repositoryNameHint = repository.getMetadata().name(); + return repository; + } + } + + throw new RepositoryMissingException("uuid [" + repositoryUuid + "], original name [" + repositoryName + "]"); + } +} diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java index b56cd28e9dc6c..bbdf371e1ed7b 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java @@ -24,8 +24,6 @@ import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.common.blobstore.BlobContainer; -import org.elasticsearch.common.blobstore.OperationPurpose; -import org.elasticsearch.common.blobstore.support.FilterBlobContainer; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.store.ByteArrayIndexInput; import org.elasticsearch.common.settings.Settings; @@ -43,8 +41,6 @@ import org.elasticsearch.indices.recovery.RecoveryState; import org.elasticsearch.repositories.IndexId; import org.elasticsearch.repositories.RepositoriesService; -import org.elasticsearch.repositories.Repository; -import org.elasticsearch.repositories.RepositoryMissingException; import org.elasticsearch.repositories.blobstore.BlobStoreRepository; import org.elasticsearch.snapshots.SnapshotId; import org.elasticsearch.threadpool.ThreadPool; @@ -62,7 +58,6 @@ import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; import java.io.UncheckedIOException; import java.nio.file.Files; import java.nio.file.Path; @@ -134,7 +129,6 @@ public class SearchableSnapshotDirectory extends BaseDirectory { // volatile fields are updated once under `this` lock, all together, iff loaded is not true. private volatile BlobStoreIndexShardSnapshot snapshot; - private volatile BlobContainer blobContainer; private volatile boolean loaded; private volatile SearchableSnapshotRecoveryState recoveryState; @@ -182,7 +176,6 @@ public SearchableSnapshotDirectory( private synchronized boolean invariant() { assert loaded != (snapshot == null); - assert loaded != (blobContainer == null); assert loaded != (recoveryState == null); return true; } @@ -212,7 +205,6 @@ public boolean loadSnapshot( synchronized (this) { alreadyLoaded = this.loaded; if (alreadyLoaded == false) { - this.blobContainer = blobContainerSupplier.get(); this.snapshot = snapshotSupplier.get(); this.loaded = true; cleanExistingRegularShardFiles(); @@ -226,14 +218,12 @@ public boolean loadSnapshot( return alreadyLoaded == false; } - @Nullable public BlobContainer blobContainer() { - final BlobContainer blobContainer = this.blobContainer; + final BlobContainer blobContainer = blobContainerSupplier.get(); assert blobContainer != null; return blobContainer; } - @Nullable public BlobStoreIndexShardSnapshot snapshot() { final BlobStoreIndexShardSnapshot snapshot = this.snapshot; assert snapshot != null; @@ -590,23 +580,15 @@ public static Directory create( ); } - Repository repository; - final String repositoryName; - if (SNAPSHOT_REPOSITORY_UUID_SETTING.exists(indexSettings.getSettings())) { - repository = repositoryByUuid( - repositories.getRepositories(), - SNAPSHOT_REPOSITORY_UUID_SETTING.get(indexSettings.getSettings()), - SNAPSHOT_REPOSITORY_NAME_SETTING.get(indexSettings.getSettings()) - ); - repositoryName = repository.getMetadata().name(); - } else { - // repository containing pre-7.12 snapshots has no UUID so we assume it matches by name - repositoryName = SNAPSHOT_REPOSITORY_NAME_SETTING.get(indexSettings.getSettings()); - repository = repositories.repository(repositoryName); - assert repository.getMetadata().name().equals(repositoryName) : repository.getMetadata().name() + " vs " + repositoryName; - } + final Supplier repositorySupplier = new RepositorySupplier( + repositories, + SNAPSHOT_REPOSITORY_NAME_SETTING.get(indexSettings.getSettings()), + SNAPSHOT_REPOSITORY_UUID_SETTING.exists(indexSettings.getSettings()) + ? SNAPSHOT_REPOSITORY_UUID_SETTING.get(indexSettings.getSettings()) + : null + ); - final BlobStoreRepository blobStoreRepository = SearchableSnapshots.getSearchableRepository(repository); + final BlobStoreRepository initialRepository = repositorySupplier.get(); final IndexId indexId = new IndexId( SNAPSHOT_INDEX_NAME_SETTING.get(indexSettings.getSettings()), @@ -617,14 +599,14 @@ public static Directory create( SNAPSHOT_SNAPSHOT_ID_SETTING.get(indexSettings.getSettings()) ); - final LazyInitializable lazyBlobContainer = new LazyInitializable<>( - () -> new RateLimitingBlobContainer( - blobStoreRepository, - blobStoreRepository.shardContainer(indexId, shardPath.getShardId().id()) - ) + final Supplier blobContainerSupplier = new BlobContainerSupplier( + repositorySupplier, + indexId, + shardPath.getShardId().id() ); + final LazyInitializable lazySnapshot = new LazyInitializable<>( - () -> blobStoreRepository.loadShardSnapshot(lazyBlobContainer.getOrCompute(), snapshotId) + () -> repositorySupplier.get().loadShardSnapshot(blobContainerSupplier.get(), snapshotId) ); final Path cacheDir = CacheService.getShardCachePath(shardPath).resolve(snapshotId.getUUID()); @@ -632,10 +614,10 @@ public static Directory create( return new InMemoryNoOpCommitDirectory( new SearchableSnapshotDirectory( - lazyBlobContainer::getOrCompute, + blobContainerSupplier, lazySnapshot::getOrCompute, blobStoreCacheService, - repositoryName, + initialRepository.getMetadata().name(), snapshotId, indexId, shardPath.getShardId(), @@ -690,42 +672,4 @@ public void putCachedBlob(String name, ByteRange range, BytesReference content, public SharedBlobCacheService.CacheFile getFrozenCacheFile(String fileName, long length) { return sharedBlobCacheService.getCacheFile(createCacheKey(fileName), length); } - - private static Repository repositoryByUuid(Map repositories, String repositoryUuid, String originalName) { - for (Repository repository : repositories.values()) { - if (repository.getMetadata().uuid().equals(repositoryUuid)) { - return repository; - } - } - throw new RepositoryMissingException("uuid [" + repositoryUuid + "], original name [" + originalName + "]"); - } - - /** - * A {@link FilterBlobContainer} that uses {@link BlobStoreRepository#maybeRateLimitRestores(InputStream)} to limit the rate at which - * blobs are read from the repository. - */ - private static class RateLimitingBlobContainer extends FilterBlobContainer { - - private final BlobStoreRepository blobStoreRepository; - - RateLimitingBlobContainer(BlobStoreRepository blobStoreRepository, BlobContainer blobContainer) { - super(blobContainer); - this.blobStoreRepository = blobStoreRepository; - } - - @Override - protected BlobContainer wrapChild(BlobContainer child) { - return new RateLimitingBlobContainer(blobStoreRepository, child); - } - - @Override - public InputStream readBlob(OperationPurpose purpose, String blobName) throws IOException { - return blobStoreRepository.maybeRateLimitRestores(super.readBlob(purpose, blobName)); - } - - @Override - public InputStream readBlob(OperationPurpose purpose, String blobName, long position, long length) throws IOException { - return blobStoreRepository.maybeRateLimitRestores(super.readBlob(purpose, blobName, position, length)); - } - } }