From 9ebe95a8a806927147f054a211f26034254560f6 Mon Sep 17 00:00:00 2001 From: Matteo Piergiovanni <134913285+piergm@users.noreply.github.com> Date: Thu, 7 Nov 2024 09:33:57 +0100 Subject: [PATCH 1/4] Better sizing BytesRef for Strings in Queries (#115655) * Better sizing BytesRefs for Strings in Queries * Update docs/changelog/115655.yaml * iter * added test * iter * extracted method * iter --------- Co-authored-by: Elastic Machine --- docs/changelog/115655.yaml | 5 +++++ .../common/lucene/BytesRefs.java | 21 +++++++++++++++++- .../index/query/AbstractQueryBuilder.java | 12 +++++----- .../query/AbstractQueryBuilderTests.java | 22 +++++++++++++++++++ 4 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 docs/changelog/115655.yaml diff --git a/docs/changelog/115655.yaml b/docs/changelog/115655.yaml new file mode 100644 index 0000000000000..7184405867657 --- /dev/null +++ b/docs/changelog/115655.yaml @@ -0,0 +1,5 @@ +pr: 115655 +summary: Better sizing `BytesRef` for Strings in Queries +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java b/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java index ff8af9b80edcc..ed88c3a5a9c91 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/BytesRefs.java @@ -11,6 +11,7 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; public class BytesRefs { @@ -56,6 +57,25 @@ public static BytesRef checkIndexableLength(BytesRef input) { return input; } + /** + * Converts a given string to a {@link BytesRef} object with an exactly sized byte array. + *

+ * This method alternative method to the standard {@link BytesRef} constructor's allocates the + * exact byte array size needed for the string. This is done by parsing the UTF-16 string two + * times the first to estimate the array length and the second to copy the string value inside + * the array. + *

+ * + * @param s the input string to convert + * @return a BytesRef object representing the input string + */ + public static BytesRef toExactSizedBytesRef(String s) { + int l = s.length(); + byte[] b = new byte[UnicodeUtil.calcUTF16toUTF8Length(s, 0, l)]; + UnicodeUtil.UTF16toUTF8(s, 0, l, b); + return new BytesRef(b, 0, b.length); + } + /** * Produces a UTF-string prefix of the input BytesRef. If the prefix cutoff would produce * ill-formed UTF, it falls back to the hexadecimal representation. @@ -70,5 +90,4 @@ private static String safeStringPrefix(BytesRef input, int prefixLength) { return prefix.toString(); } } - } diff --git a/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java index 5df63687e1786..f00e6904feac7 100644 --- a/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/AbstractQueryBuilder.java @@ -216,12 +216,12 @@ public final int hashCode() { * @return the same input object or a {@link BytesRef} representation if input was of type string */ static Object maybeConvertToBytesRef(Object obj) { - if (obj instanceof String) { - return BytesRefs.checkIndexableLength(BytesRefs.toBytesRef(obj)); - } else if (obj instanceof CharBuffer) { - return BytesRefs.checkIndexableLength(new BytesRef((CharBuffer) obj)); - } else if (obj instanceof BigInteger) { - return BytesRefs.toBytesRef(obj); + if (obj instanceof String v) { + return BytesRefs.checkIndexableLength(BytesRefs.toExactSizedBytesRef(v)); + } else if (obj instanceof CharBuffer v) { + return BytesRefs.checkIndexableLength(new BytesRef(v)); + } else if (obj instanceof BigInteger v) { + return BytesRefs.toBytesRef(v); } return obj; } diff --git a/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java index 07c8166741e63..a43c1a8ba3395 100644 --- a/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/AbstractQueryBuilderTests.java @@ -10,6 +10,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.search.SearchModule; @@ -93,4 +94,25 @@ public void testMaybeConvertToBytesRefLongTerm() { assertThat(e.getMessage(), containsString("term starting with [aaaaa")); } + public void testMaybeConvertToBytesRefStringCorrectSize() { + int capacity = randomIntBetween(20, 40); + StringBuilder termBuilder = new StringBuilder(capacity); + int correctSize = 0; + for (int i = 0; i < capacity; i++) { + if (i < capacity / 3) { + termBuilder.append((char) randomIntBetween(0, 128)); + ++correctSize; // use only one byte for char < 128 + } else if (i < 2 * capacity / 3) { + termBuilder.append((char) randomIntBetween(128, 2048)); + correctSize += 2; // use two bytes for char < 2048 + } else { + termBuilder.append((char) randomIntBetween(2048, 4092)); + correctSize += 3; // use three bytes for char >= 2048 + } + } + BytesRef bytesRef = (BytesRef) AbstractQueryBuilder.maybeConvertToBytesRef(termBuilder.toString()); + assertEquals(correctSize, bytesRef.bytes.length); + assertEquals(correctSize, bytesRef.length); + } + } From d66b5ae5759de22fba0a31ce2e3d63d520b019ed Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 7 Nov 2024 09:35:36 +0100 Subject: [PATCH 2/4] Cleanup some redundancies around DfsQueryPhase (#116057) No need to add the result consumer to teh context another time, it's already added to it in the constructor of `SearchDfsQueryThenFetchAsyncAction`. Also, no need to feed `this` and `this.results` to `getNextPhase` explicitly, there's only a single call to this method so we can safely clean up the redundant arguments. --- .../action/search/AbstractSearchAsyncAction.java | 7 ++----- .../elasticsearch/action/search/DfsQueryPhase.java | 4 ---- .../search/SearchDfsQueryThenFetchAsyncAction.java | 6 +++--- .../search/SearchQueryThenFetchAsyncAction.java | 2 +- .../search/TransportOpenPointInTimeAction.java | 2 +- .../search/AbstractSearchAsyncActionTests.java | 2 +- .../action/search/SearchAsyncActionTests.java | 12 ++++++------ .../search/SearchQueryThenFetchAsyncActionTests.java | 2 +- 8 files changed, 15 insertions(+), 22 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index ec441cd4e58c4..5a923b17f1330 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -689,7 +689,7 @@ private void raisePhaseFailure(SearchPhaseExecutionException exception) { * @see #onShardResult(SearchPhaseResult, SearchShardIterator) */ final void onPhaseDone() { // as a tribute to @kimchy aka. finishHim() - executeNextPhase(this, () -> getNextPhase(results, this)); + executeNextPhase(this, this::getNextPhase); } @Override @@ -746,11 +746,8 @@ protected final ShardSearchRequest buildShardSearchRequest(SearchShardIterator s /** * Returns the next phase based on the results of the initial search phase - * @param results the results of the initial search phase. Each non null element in the result array represent a successfully - * executed shard request - * @param context the search context for the next phase */ - protected abstract SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context); + protected abstract SearchPhase getNextPhase(); private static final class PendingExecutions { private final Semaphore semaphore; diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 0b587e72141ff..36d73c0db166a 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -65,10 +65,6 @@ final class DfsQueryPhase extends SearchPhase { this.nextPhaseFactory = nextPhaseFactory; this.context = context; this.searchTransportService = context.getSearchTransport(); - - // register the release of the query consumer to free up the circuit breaker memory - // at the end of the search - context.addReleasable(queryResult); } @Override diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java index 26eb266cd457e..69ca1569a7c07 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchDfsQueryThenFetchAsyncAction.java @@ -98,7 +98,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(final SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { final List dfsSearchResults = results.getAtomicArray().asList(); final AggregatedDfs aggregatedDfs = SearchPhaseController.aggregateDfs(dfsSearchResults); final List mergedKnnResults = SearchPhaseController.mergeKnnResults(getRequest(), dfsSearchResults); @@ -107,8 +107,8 @@ protected SearchPhase getNextPhase(final SearchPhaseResults res aggregatedDfs, mergedKnnResults, queryPhaseResultConsumer, - (queryResults) -> SearchQueryThenFetchAsyncAction.nextPhase(client, context, queryResults, aggregatedDfs), - context + (queryResults) -> SearchQueryThenFetchAsyncAction.nextPhase(client, this, queryResults, aggregatedDfs), + this ); } diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java index 33b2cdf74cd79..e92b5bbf4b5e5 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncAction.java @@ -147,7 +147,7 @@ static SearchPhase nextPhase( } @Override - protected SearchPhase getNextPhase(final SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return nextPhase(client, this, results, null); } diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java index c4a078d9d00ad..010f96f212116 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportOpenPointInTimeAction.java @@ -277,7 +277,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase(getName()) { private void onExecuteFailure(Exception e) { diff --git a/server/src/test/java/org/elasticsearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/elasticsearch/action/search/AbstractSearchAsyncActionTests.java index d8e3151adb61d..f8ecdbd062054 100644 --- a/server/src/test/java/org/elasticsearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/AbstractSearchAsyncActionTests.java @@ -94,7 +94,7 @@ private AbstractSearchAsyncAction createAction( SearchResponse.Clusters.EMPTY ) { @Override - protected SearchPhase getNextPhase(final SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return null; } diff --git a/server/src/test/java/org/elasticsearch/action/search/SearchAsyncActionTests.java b/server/src/test/java/org/elasticsearch/action/search/SearchAsyncActionTests.java index a796827baa253..f655136cd4ba4 100644 --- a/server/src/test/java/org/elasticsearch/action/search/SearchAsyncActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/SearchAsyncActionTests.java @@ -139,7 +139,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { @@ -255,7 +255,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { @@ -359,7 +359,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { @@ -488,7 +488,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { @@ -600,7 +600,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { @@ -680,7 +680,7 @@ protected void executePhaseOnShard( } @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { diff --git a/server/src/test/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncActionTests.java b/server/src/test/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncActionTests.java index e4284937474c7..6357155793fdf 100644 --- a/server/src/test/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/SearchQueryThenFetchAsyncActionTests.java @@ -204,7 +204,7 @@ public void sendExecuteQuery( null ) { @Override - protected SearchPhase getNextPhase(SearchPhaseResults results, SearchPhaseContext context) { + protected SearchPhase getNextPhase() { return new SearchPhase("test") { @Override public void run() { From 4dc5afcccec1e0d1c0a8b049be21d427c576e92b Mon Sep 17 00:00:00 2001 From: Nikolaj Volgushev Date: Thu, 7 Nov 2024 10:01:55 +0100 Subject: [PATCH 3/4] Only include settings file for working ES versions (#116250) To avoid failed cluster starts due to https://github.com/elastic/elasticsearch/issues/91939 in upgrade tests, we should only include `settings.json` file if we are above the buggy version. I missed "forward-porting" this to main. A follow-up PR will port the suite over to the new parametrized upgrade test class and remove the settings file from the generic upgrade test flow altogether. --- x-pack/qa/rolling-upgrade/build.gradle | 9 ++++++++- .../upgrades/SecurityIndexRoleMappingCleanupIT.java | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/x-pack/qa/rolling-upgrade/build.gradle b/x-pack/qa/rolling-upgrade/build.gradle index 38fbf99068a9b..271aadfe4b388 100644 --- a/x-pack/qa/rolling-upgrade/build.gradle +++ b/x-pack/qa/rolling-upgrade/build.gradle @@ -88,7 +88,14 @@ BuildParams.bwcVersions.withWireCompatible { bwcVersion, baseName -> keystore 'xpack.watcher.encryption_key', file("${project.projectDir}/src/test/resources/system_key") setting 'xpack.watcher.encrypt_sensitive_data', 'true' - extraConfigFile 'operator/settings.json', file("${project.projectDir}/src/test/resources/operator_defined_role_mappings.json") + // file-based settings processing had a bug around applying role mappings on an unrecovered index + // this was fixed in 8.7.0 (https://github.com/elastic/elasticsearch/pull/92173). To avoid flakiness + // in the test, we only set a role mappings file for higher versions. + // TODO move this out into a separate test suite, since operator settings are not relevant for most BWC tests + // and have some side-effects + if (bwcVersion.onOrAfter('8.7.0')) { + extraConfigFile 'operator/settings.json', file("${project.projectDir}/src/test/resources/operator_defined_role_mappings.json") + } // Old versions of the code contain an invalid assertion that trips // during tests. Versions 5.6.9 and 6.2.4 have been fixed by removing diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SecurityIndexRoleMappingCleanupIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SecurityIndexRoleMappingCleanupIT.java index 82d4050c044b1..915122c97d3f1 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SecurityIndexRoleMappingCleanupIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/SecurityIndexRoleMappingCleanupIT.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.upgrades; +import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.RestClient; @@ -24,8 +25,14 @@ import static org.hamcrest.Matchers.containsInAnyOrder; public class SecurityIndexRoleMappingCleanupIT extends AbstractUpgradeTestCase { + private static final Version UPGRADE_FROM_VERSION = Version.fromString(System.getProperty("tests.upgrade_from_version")); public void testCleanupDuplicateMappings() throws Exception { + // see build.gradle where we set operator/settings.json for more details on this skip + assumeTrue( + "Cluster requires version higher than since operator/settings.json is only set then: " + Version.V_8_7_0, + UPGRADE_FROM_VERSION.onOrAfter(Version.V_8_7_0) + ); if (CLUSTER_TYPE == ClusterType.OLD) { // If we're in a state where the same operator-defined role mappings can exist both in cluster state and the native store // (V_8_15_0 transport added to security.role_mapping_cleanup feature added), create a state From a3eba570db66a1ba1697561d02811a510bfbe71d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Thu, 7 Nov 2024 11:21:53 +0100 Subject: [PATCH 4/4] Aggs: Add real memory CB call when building internal aggregators in buckets (#116329) Related with https://github.com/elastic/elasticsearch/issues/88128 This PR pretends to reduce the potential OOMs received when building internal aggregations. --- .../bucket/BucketsAggregator.java | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 7c7233b0eaa1d..e6c26c4278807 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -81,12 +81,7 @@ public final void collectBucket(LeafBucketCollector subCollector, int doc, long grow(bucketOrd + 1); int docCount = docCountProvider.getDocCount(doc); if (docCounts.increment(bucketOrd, docCount) == docCount) { - // We call the circuit breaker the time to time in order to give it a chance to check available - // memory in the parent breaker and break the execution if we are running out. To achieve that we - // are passing 0 as the estimated bytes every 1024 calls - if ((++callCount & 0x3FF) == 0) { - breaker.addEstimateBytesAndMaybeBreak(0, "allocated_buckets"); - } + updateCircuitBreaker("allocated_buckets"); } subCollector.collect(doc, bucketOrd); } @@ -179,6 +174,7 @@ protected final IntFunction buildSubAggsForBuckets(long[] prepareSubAggs(bucketOrdsToCollect); InternalAggregation[][] aggregations = new InternalAggregation[subAggregators.length][]; for (int i = 0; i < subAggregators.length; i++) { + updateCircuitBreaker("building_sub_aggregation"); aggregations[i] = subAggregators[i].buildAggregations(bucketOrdsToCollect); } return subAggsForBucketFunction(aggregations); @@ -415,4 +411,15 @@ protected void preGetSubLeafCollectors(LeafReaderContext ctx) throws IOException // Set LeafReaderContext to the doc_count provider docCountProvider.setLeafReaderContext(ctx); } + + /** + * This method calls the circuit breaker from time to time in order to give it a chance to check available + * memory in the parent breaker (Which should be a real memory breaker) and break the execution if we are running out. + * To achieve that, we are passing 0 as the estimated bytes every 1024 calls + */ + private void updateCircuitBreaker(String label) { + if ((++callCount & 0x3FF) == 0) { + breaker.addEstimateBytesAndMaybeBreak(0, label); + } + } }