From 83117dc8828ad3de2920b4a503f0eaf0746e34f8 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Sun, 4 Feb 2024 12:04:01 +0800 Subject: [PATCH 1/5] onShardResult and onShardFailure are executed on one shard causes opensearch jvm crashed Signed-off-by: kkewwei --- .../action/search/AbstractSearchAsyncAction.java | 9 ++++++++- .../action/search/AbstractSearchAsyncActionTests.java | 6 +++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 9e1d065c96dd6..b5005088f432d 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -286,6 +286,7 @@ private void performPhaseOnShard(final int shardIndex, final SearchShardIterator Runnable r = () -> { final Thread thread = Thread.currentThread(); try { + final SearchPhase phase = this; executePhaseOnShard(shardIt, shard, new SearchActionListener(shard, shardIndex) { @Override public void innerOnResponse(Result result) { @@ -299,7 +300,13 @@ public void innerOnResponse(Result result) { @Override public void onFailure(Exception t) { try { - onShardFailure(shardIndex, shard, shardIt, t); + // It only happens when onPhaseDone() is called and executePhaseOnShard() fails hard with an exception. + if (totalOps.get() == expectedTotalOps) { + onPhaseFailure(phase, "The phase has failed", t); + } else { + onShardFailure(shardIndex, shard, shardIt, t); + } + } finally { executeNext(pendingExecutions, thread); } diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 601aa9dc1856e..4ac49634c9968 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -221,7 +221,11 @@ protected void executePhaseOnShard( if (failExecutePhaseOnShard) { listener.onFailure(new ShardNotFoundException(shardIt.shardId())); } else { - listener.onResponse(new QuerySearchResult()); + try { + listener.onResponse(new QuerySearchResult()); + } catch (Exception e) { + listener.onFailure(e); + } } } From 026638f19e58c0d9ba943c626f3b1d67e8922131 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Fri, 23 Feb 2024 16:20:03 +0800 Subject: [PATCH 2/5] unit test Signed-off-by: kkewwei --- .../search/AbstractSearchAsyncAction.java | 1 - .../AbstractSearchAsyncActionTests.java | 25 ++++++++++++++++--- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index b5005088f432d..3b99ea9cc1cd2 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -306,7 +306,6 @@ public void onFailure(Exception t) { } else { onShardFailure(shardIndex, shard, shardIt, t); } - } finally { executeNext(pendingExecutions, thread); } diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 4ac49634c9968..101ca52f065db 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -151,6 +151,7 @@ private AbstractSearchAsyncAction createAction( listener, controlled, false, + false, expected, new SearchShardIterator(null, null, Collections.emptyList(), null) ); @@ -162,6 +163,7 @@ private AbstractSearchAsyncAction createAction( ActionListener listener, final boolean controlled, final boolean failExecutePhaseOnShard, + final boolean catchExceptionInExecutePhaseOnShard, final AtomicLong expected, final SearchShardIterator... shards ) { @@ -221,10 +223,14 @@ protected void executePhaseOnShard( if (failExecutePhaseOnShard) { listener.onFailure(new ShardNotFoundException(shardIt.shardId())); } else { - try { + if (catchExceptionInExecutePhaseOnShard) { + try { + listener.onResponse(new QuerySearchResult()); + } catch (Exception e) { + listener.onFailure(e); + } + } else { listener.onResponse(new QuerySearchResult()); - } catch (Exception e) { - listener.onFailure(e); } } } @@ -513,6 +519,7 @@ public void onFailure(Exception e) { }, false, true, + false, new AtomicLong(), shards ); @@ -559,6 +566,7 @@ public void onFailure(Exception e) { }, false, false, + false, new AtomicLong(), shards ); @@ -574,7 +582,7 @@ public void onFailure(Exception e) { assertThat(searchResponse.getSuccessfulShards(), equalTo(shards.length)); } - public void testExecutePhaseOnShardFailure() throws InterruptedException { + private void innerTestExecutePhaseOnShardFailure(boolean catchExceptionInExecutePhaseOnShard) throws InterruptedException { final Index index = new Index("test", UUID.randomUUID().toString()); final SearchShardIterator[] shards = IntStream.range(0, 2 + randomInt(3)) @@ -610,6 +618,7 @@ public void onFailure(Exception e) { }, false, false, + catchExceptionInExecutePhaseOnShard, new AtomicLong(), shards ); @@ -625,6 +634,14 @@ public void onFailure(Exception e) { assertThat(searchResponse.getSuccessfulShards(), equalTo(shards.length)); } + public void testExecutePhaseOnShardFailure() throws InterruptedException { + innerTestExecutePhaseOnShardFailure(false); + } + + public void testExecutePhaseOnShardFailureAndThrowException() throws InterruptedException { + innerTestExecutePhaseOnShardFailure(true); + } + public void testOnPhaseListenersWithQueryAndThenFetchType() throws InterruptedException { ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); SearchRequestStats testListener = new SearchRequestStats(clusterSettings); From 2a90a0ceb9f1ff570e02012cbe7a33cf99e51d2a Mon Sep 17 00:00:00 2001 From: kkewwei Date: Thu, 29 Feb 2024 14:18:33 +0800 Subject: [PATCH 3/5] spotlessJavaCheck Signed-off-by: kkewwei --- .../action/search/AbstractSearchAsyncActionTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 101ca52f065db..62e55381cea22 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -638,7 +638,7 @@ public void testExecutePhaseOnShardFailure() throws InterruptedException { innerTestExecutePhaseOnShardFailure(false); } - public void testExecutePhaseOnShardFailureAndThrowException() throws InterruptedException { + public void testExecutePhaseOnShardFailureAndThrowException() throws InterruptedException { innerTestExecutePhaseOnShardFailure(true); } From a53c1300ba7fd5130b0708f4903b28cb048d6abf Mon Sep 17 00:00:00 2001 From: kkewwei Date: Thu, 29 Feb 2024 17:15:51 +0800 Subject: [PATCH 4/5] rename variable names Signed-off-by: kkewwei --- .../action/search/AbstractSearchAsyncActionTests.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 62e55381cea22..3af7af114e96d 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -163,7 +163,7 @@ private AbstractSearchAsyncAction createAction( ActionListener listener, final boolean controlled, final boolean failExecutePhaseOnShard, - final boolean catchExceptionInExecutePhaseOnShard, + final boolean catchExceptionWhenExecutePhaseOnShard, final AtomicLong expected, final SearchShardIterator... shards ) { @@ -223,7 +223,7 @@ protected void executePhaseOnShard( if (failExecutePhaseOnShard) { listener.onFailure(new ShardNotFoundException(shardIt.shardId())); } else { - if (catchExceptionInExecutePhaseOnShard) { + if (catchExceptionWhenExecutePhaseOnShard) { try { listener.onResponse(new QuerySearchResult()); } catch (Exception e) { @@ -582,7 +582,7 @@ public void onFailure(Exception e) { assertThat(searchResponse.getSuccessfulShards(), equalTo(shards.length)); } - private void innerTestExecutePhaseOnShardFailure(boolean catchExceptionInExecutePhaseOnShard) throws InterruptedException { + private void innerTestExecutePhaseOnShardFailure(boolean catchExceptionWhenExecutePhaseOnShard) throws InterruptedException { final Index index = new Index("test", UUID.randomUUID().toString()); final SearchShardIterator[] shards = IntStream.range(0, 2 + randomInt(3)) @@ -618,7 +618,7 @@ public void onFailure(Exception e) { }, false, false, - catchExceptionInExecutePhaseOnShard, + catchExceptionWhenExecutePhaseOnShard, new AtomicLong(), shards ); From e449b920e8fa682195091663b5c67011abee0bd3 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Sun, 10 Mar 2024 14:53:01 +0800 Subject: [PATCH 5/5] add changelog Signed-off-by: kkewwei --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e8358908e853..fc885993551db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix `terms` query on `float` field when `doc_values` are turned off by reverting back to `FloatPoint` from `FloatField` ([#12499](https://github.com/opensearch-project/OpenSearch/pull/12499)) - Fix get task API does not refresh resource stats ([#11531](https://github.com/opensearch-project/OpenSearch/pull/11531)) - Fix for deserilization bug in weighted round-robin metadata ([#11679](https://github.com/opensearch-project/OpenSearch/pull/11679)) +- onShardResult and onShardFailure are executed on one shard causes opensearch jvm crashed ([#12158](https://github.com/opensearch-project/OpenSearch/pull/12158)) ### Security