Skip to content

Commit

Permalink
Re-enable HeapAttackIT (#104107)
Browse files Browse the repository at this point in the history
This PR enables ESQL heap attack tests. I have run this suite over 500 
iterations with different hardware configurations for the last two days,
and all have been successful with the changes in #104159.

Additionally, this PR adds an action that can trigger OOM to generate a 
heap dump if a test takes more than 5 minutes. I've seen cases
(previously with our CI) where the test didn't result in OOM but was
taking too long. Having the ability to inspect the heap in such cases
would be beneficial.

Closes #103527
Closes #100678
  • Loading branch information
dnhatn authored Jan 10, 2024
1 parent 79e3a67 commit 709c0f5
Show file tree
Hide file tree
Showing 10 changed files with 187 additions and 32 deletions.
5 changes: 0 additions & 5 deletions test/external-modules/apm-integration/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@ tasks.named("test").configure {
enabled = false
}

tasks.named("yamlRestTest").configure {
enabled = false
}

tasks.named('javaRestTest').configure {
it.onlyIf("snapshot build") { BuildParams.isSnapshotBuild() }
}


dependencies {
clusterModules project(':modules:apm')
}
5 changes: 0 additions & 5 deletions test/external-modules/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,10 @@ import org.elasticsearch.gradle.internal.info.BuildParams

subprojects {
apply plugin: 'elasticsearch.base-internal-es-plugin'
apply plugin: 'elasticsearch.legacy-yaml-rest-test'

esplugin {
name it.name
licenseFile rootProject.file('licenses/SSPL-1.0+ELASTIC-LICENSE-2.0.txt')
noticeFile rootProject.file('NOTICE.txt')
}

tasks.named('yamlRestTest').configure {
it.onlyIf("snapshot build") { BuildParams.isSnapshotBuild() }
}
}
7 changes: 7 additions & 0 deletions test/external-modules/delayed-aggs/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import org.elasticsearch.gradle.internal.info.BuildParams

apply plugin: 'elasticsearch.legacy-yaml-rest-test'

tasks.named('yamlRestTest').configure {
it.onlyIf("snapshot build") { BuildParams.isSnapshotBuild() }
}

esplugin {
description 'A test module that allows to delay aggregations on shards with a configurable time'
Expand Down
4 changes: 0 additions & 4 deletions test/external-modules/die-with-dignity/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ tasks.named("test").configure {
enabled = false
}

tasks.named("yamlRestTest").configure {
enabled = false
}

tasks.named('javaRestTest').configure {
it.onlyIf("snapshot build") { BuildParams.isSnapshotBuild() }
}
7 changes: 7 additions & 0 deletions test/external-modules/error-query/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
* Side Public License, v 1.
*/

import org.elasticsearch.gradle.internal.info.BuildParams
apply plugin: 'elasticsearch.legacy-yaml-rest-test'

tasks.named('yamlRestTest').configure {
it.onlyIf("snapshot build") { BuildParams.isSnapshotBuild() }
}

esplugin {
description 'A test module that exposes a way to simulate search shard failures and warnings'
classname 'org.elasticsearch.test.errorquery.ErrorQueryPlugin'
Expand Down
20 changes: 20 additions & 0 deletions test/external-modules/esql-heap-attack/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

apply plugin: 'elasticsearch.internal-java-rest-test'
// Necessary to use tests in Serverless
apply plugin: 'elasticsearch.internal-test-artifact'

esplugin {
description 'A test module that can trigger out of memory'
classname 'org.elasticsearch.test.esql.heap_attack.HeapAttackPlugin'
}

tasks.named('javaRestTest') {
usesDefaultDistribution()
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,34 @@
* 2.0.
*/

package org.elasticsearch.xpack.esql.qa.heap_attack;
package org.elasticsearch.xpack.esql.heap_attack;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.util.EntityUtils;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.elasticsearch.client.Request;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.Response;
import org.elasticsearch.client.ResponseException;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.WarningsHandler;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.test.ListMatcher;
import org.elasticsearch.test.cluster.ElasticsearchCluster;
import org.elasticsearch.test.cluster.local.distribution.DistributionType;
import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.TestThreadPool;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
Expand All @@ -51,12 +58,12 @@
* Tests that run ESQL queries that have, in the past, used so much memory they
* crash Elasticsearch.
*/
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/103527")
public class HeapAttackIT extends ESRestTestCase {

@ClassRule
public static ElasticsearchCluster cluster = ElasticsearchCluster.local()
.distribution(DistributionType.DEFAULT)
.module("test-esql-heap-attack")
.setting("xpack.security.enabled", "false")
.setting("xpack.license.self_generated.type", "trial")
.build();
Expand Down Expand Up @@ -265,7 +272,6 @@ public void testManyEval() throws IOException {
assertMap(map, matchesMap().entry("columns", columns).entry("values", hasSize(10_000)));
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/100528")
public void testTooManyEval() throws IOException {
initManyLongs();
assertCircuitBreaks(() -> manyEval(1000));
Expand Down Expand Up @@ -299,7 +305,40 @@ private Response query(String query, String filterPath) throws IOException {
.setRequestConfig(RequestConfig.custom().setSocketTimeout(Math.toIntExact(TimeValue.timeValueMinutes(5).millis())).build())
.setWarningsHandler(WarningsHandler.PERMISSIVE)
);
return client().performRequest(request);
logger.info("--> test {} started querying", getTestName());
final ThreadPool testThreadPool = new TestThreadPool(getTestName());
final long startedTimeInNanos = System.nanoTime();
Scheduler.Cancellable schedule = null;
try {
schedule = testThreadPool.schedule(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
throw new AssertionError(e);
}

@Override
protected void doRun() throws Exception {
TimeValue elapsed = TimeValue.timeValueNanos(System.nanoTime() - startedTimeInNanos);
logger.info("--> test {} triggering OOM after {}", getTestName(), elapsed);
Request triggerOOM = new Request("POST", "/_trigger_out_of_memory");
client().performRequest(triggerOOM);
}
}, TimeValue.timeValueMinutes(5), testThreadPool.executor(ThreadPool.Names.GENERIC));
Response resp = client().performRequest(request);
logger.info("--> test {} completed querying", getTestName());
return resp;
} finally {
if (schedule != null) {
schedule.cancel();
}
terminate(testThreadPool);
}
}

@Override
protected RestClient buildClient(Settings settings, HttpHost[] hosts) throws IOException {
settings = Settings.builder().put(settings).put(ESRestTestCase.CLIENT_SOCKET_TIMEOUT, "6m").build();
return super.buildClient(settings, hosts);
}

public void testFetchManyBigFields() throws IOException {
Expand Down Expand Up @@ -510,6 +549,16 @@ private static void assertWriteResponse(Response response) throws IOException {
@Before
@After
public void assertRequestBreakerEmpty() throws Exception {
EsqlSpecTestCase.assertRequestBreakerEmpty();
assertBusy(() -> {
HttpEntity entity = adminClient().performRequest(new Request("GET", "/_nodes/stats")).getEntity();
Map<?, ?> stats = XContentHelper.convertToMap(XContentType.JSON.xContent(), entity.getContent(), false);
Map<?, ?> nodes = (Map<?, ?>) stats.get("nodes");
for (Object n : nodes.values()) {
Map<?, ?> node = (Map<?, ?>) n;
Map<?, ?> breakers = (Map<?, ?>) node.get("breakers");
Map<?, ?> request = (Map<?, ?>) breakers.get("request");
assertMap(request, matchesMap().extraOk().entry("estimated_size_in_bytes", 0).entry("estimated_size", "0b"));
}
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.test.esql.heap_attack;

import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsFilter;
import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestHandler;

import java.util.List;
import java.util.function.Supplier;

public class HeapAttackPlugin extends Plugin implements ActionPlugin {
@Override
public List<RestHandler> getRestHandlers(
Settings settings,
RestController restController,
ClusterSettings clusterSettings,
IndexScopedSettings indexScopedSettings,
SettingsFilter settingsFilter,
IndexNameExpressionResolver indexNameExpressionResolver,
Supplier<DiscoveryNodes> nodesInCluster
) {
return List.of(new RestTriggerOutOfMemoryAction());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.test.esql.heap_attack;

import org.elasticsearch.client.internal.node.NodeClient;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestRequest;

import java.util.ArrayList;
import java.util.List;

import static org.elasticsearch.rest.RestRequest.Method.POST;

public class RestTriggerOutOfMemoryAction extends BaseRestHandler {
private static final Logger LOGGER = LogManager.getLogger(RestTriggerOutOfMemoryAction.class);

@Override
public String getName() {
return "trigger_out_of_memory";
}

@Override
public List<Route> routes() {
return List.of(new Route(POST, "/_trigger_out_of_memory"));
}

@Override
protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) {
LOGGER.error("triggering out of memory");
List<int[]> values = new ArrayList<>();
return channel -> {
while (true) {
values.add(new int[1024 * 1024]);
}
};
}
}
11 changes: 0 additions & 11 deletions x-pack/plugin/esql/qa/server/heap-attack/build.gradle

This file was deleted.

0 comments on commit 709c0f5

Please sign in to comment.