diff --git a/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java b/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java index e0e356f1531e1..4712ae6c18759 100644 --- a/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java +++ b/server/src/main/java/org/opensearch/indices/replication/OngoingSegmentReplications.java @@ -139,13 +139,25 @@ void startSegmentCopy(GetSegmentFilesRequest request, ActionListener { - if (segrepHandler != null) { - logger.warn("Override handler for allocation id {}", request.getTargetAllocationId()); - cancelHandlers(handler -> handler.getAllocationId().equals(request.getTargetAllocationId()), "cancel due to retry"); - } - return createTargetHandler(request.getTargetNode(), copyState, request.getTargetAllocationId(), fileChunkWriter); - }); + final SegmentReplicationSourceHandler newHandler = createTargetHandler( + request.getTargetNode(), + copyState, + request.getTargetAllocationId(), + fileChunkWriter + ); + final SegmentReplicationSourceHandler existingHandler = allocationIdToHandlers.putIfAbsent( + request.getTargetAllocationId(), + newHandler + ); + // If we are already replicating to this allocation Id, cancel the old and replace with a new execution. + // This will clear the old handler & referenced copy state holding an incref'd indexCommit. + if (existingHandler != null) { + logger.warn("Override handler for allocation id {}", request.getTargetAllocationId()); + cancelHandlers(handler -> handler.getAllocationId().equals(request.getTargetAllocationId()), "cancel due to retry"); + assert allocationIdToHandlers.containsKey(request.getTargetAllocationId()) == false; + allocationIdToHandlers.put(request.getTargetAllocationId(), newHandler); + } + assert allocationIdToHandlers.containsKey(request.getTargetAllocationId()); return copyState; } diff --git a/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java b/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java index 3b289114f5ca1..84a53ae22a6bc 100644 --- a/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/OngoingSegmentReplicationsTests.java @@ -403,4 +403,38 @@ public void testCancelForMissingIds() throws IOException { assertEquals(0, replications.cachedCopyStateSize()); closeShards(replica_2); } + + public void testPrepareForReplicationAlreadyReplicating() throws IOException { + OngoingSegmentReplications replications = new OngoingSegmentReplications(mockIndicesService, recoverySettings); + final String replicaAllocationId = replica.routingEntry().allocationId().getId(); + final CheckpointInfoRequest request = new CheckpointInfoRequest(1L, replicaAllocationId, primaryDiscoveryNode, testCheckpoint); + + final CopyState copyState = replications.prepareForReplication(request, mock(FileChunkWriter.class)); + + final SegmentReplicationSourceHandler handler = replications.getHandlers().get(replicaAllocationId); + assertEquals(handler.getCopyState(), copyState); + assertEquals(1, copyState.refCount()); + + ReplicationCheckpoint secondCheckpoint = new ReplicationCheckpoint( + testCheckpoint.getShardId(), + testCheckpoint.getPrimaryTerm(), + testCheckpoint.getSegmentsGen(), + testCheckpoint.getSegmentInfosVersion() + 1, + testCheckpoint.getCodec() + ); + + final CheckpointInfoRequest secondRequest = new CheckpointInfoRequest( + 1L, + replicaAllocationId, + primaryDiscoveryNode, + secondCheckpoint + ); + + final CopyState secondCopyState = replications.prepareForReplication(secondRequest, mock(FileChunkWriter.class)); + final SegmentReplicationSourceHandler secondHandler = replications.getHandlers().get(replicaAllocationId); + assertEquals(secondHandler.getCopyState(), secondCopyState); + assertEquals("New copy state is incref'd", 1, secondCopyState.refCount()); + assertEquals("Old copy state is cleaned up", 0, copyState.refCount()); + + } }