From d172f3f1235d2db6ce15822a5fbd4e33ec7394ee Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Fri, 22 Mar 2024 10:06:59 -0700 Subject: [PATCH] Fix flaky test SegmentReplicationWithNodeToNodeIndexShardTests#testReplicaClosesWhileReplicating_AfterGetCheckpoint (#12695) (#12741) This fixes a race condition in the test where the primary shard will still have an open file ref while shutting down. This happens because we are fetching file refs inside the resolveCheckpointInfoResponseListener method right after calling beforeIndexShardClosed. BeforeIndexShardClosed will resolve replication listeners immediately and leave a possibility of the primary attempting shut down before those refs are closed. We can resolve this using latches, but this test really doesn't need to simulate a primary response at all so removed it entirely. (cherry picked from commit 5e2034c684f51f73199d81294e1c420a1673672e) Signed-off-by: Marc Handalian Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] --- .../shard/SegmentReplicationWithNodeToNodeIndexShardTests.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java index f0950fe5392de..e541e988f3920 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java @@ -110,7 +110,6 @@ public void testReplicaClosesWhileReplicating_AfterGetCheckpoint() throws Except IndexShard primary = shards.getPrimary(); final IndexShard replica = shards.getReplicas().get(0); - final int numDocs = shards.indexDocs(randomInt(10)); primary.refresh("Test"); final SegmentReplicationSourceFactory sourceFactory = mock(SegmentReplicationSourceFactory.class); @@ -124,7 +123,6 @@ public void getCheckpointMetadata( ) { // trigger a cancellation by closing the replica. targetService.beforeIndexShardClosed(replica.shardId, replica, Settings.EMPTY); - resolveCheckpointInfoResponseListener(listener, primary); } @Override @@ -141,7 +139,6 @@ public void getSegmentFiles( }; when(sourceFactory.get(any())).thenReturn(source); startReplicationAndAssertCancellation(replica, primary, targetService); - shards.removeReplica(replica); closeShards(replica); }