From b61277099442c76cad90854cb4205022300b0f84 Mon Sep 17 00:00:00 2001 From: Raghuvansh Raj Date: Fri, 10 Nov 2023 07:00:31 +0530 Subject: [PATCH] Bugfix for update staying stuck when sent as part of bulk with retry_on_conflict specified Signed-off-by: Raghuvansh Raj --- .../bulk/BulkPrimaryExecutionContext.java | 1 + .../bulk/TransportShardBulkActionTests.java | 63 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java index 896456089ee3e..4e770f5851bc6 100644 --- a/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java +++ b/server/src/main/java/org/opensearch/action/bulk/BulkPrimaryExecutionContext.java @@ -232,6 +232,7 @@ public void resetForExecutionForRetry() { currentItemState = ItemProcessingState.INITIAL; requestToExecute = null; executionResult = null; + retryCounter++; assertInvariants(ItemProcessingState.INITIAL); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index b325cfa197933..99c311c11e33f 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -101,6 +101,7 @@ import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.CountDownLatch; import java.util.concurrent.CyclicBarrier; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import static org.hamcrest.CoreMatchers.equalTo; @@ -947,6 +948,68 @@ public void testRetries() throws Exception { latch.await(); } + public void testUpdateWithRetryOnConflict() throws IOException, InterruptedException { + IndexSettings indexSettings = new IndexSettings(indexMetadata(), Settings.EMPTY); + UpdateRequest writeRequest = new UpdateRequest("index", "id").doc(Requests.INDEX_CONTENT_TYPE, "field", "value"); + writeRequest.retryOnConflict(3); + BulkItemRequest primaryRequest = new BulkItemRequest(0, writeRequest); + + IndexRequest updateResponse = new IndexRequest("index").id("id").source(Requests.INDEX_CONTENT_TYPE, "field", "value"); + + Exception err = new VersionConflictEngineException(shardId, "id", "I'm conflicted <(;_;)>"); + Engine.IndexResult conflictedResult = new Engine.IndexResult(err, 0); + + IndexShard shard = mock(IndexShard.class); + when(shard.applyIndexOperationOnPrimary(anyLong(), any(), any(), anyLong(), anyLong(), anyLong(), anyBoolean())).thenAnswer( + ir -> conflictedResult + ); + when(shard.indexSettings()).thenReturn(indexSettings); + when(shard.shardId()).thenReturn(shardId); + when(shard.mapperService()).thenReturn(mock(MapperService.class)); + + UpdateHelper updateHelper = mock(UpdateHelper.class); + when(updateHelper.prepare(any(), eq(shard), any())).thenReturn( + new UpdateHelper.Result( + updateResponse, + randomBoolean() ? DocWriteResponse.Result.CREATED : DocWriteResponse.Result.UPDATED, + Collections.singletonMap("field", "value"), + Requests.INDEX_CONTENT_TYPE + ) + ); + + BulkItemRequest[] items = new BulkItemRequest[] { primaryRequest }; + BulkShardRequest bulkShardRequest = new BulkShardRequest(shardId, RefreshPolicy.NONE, items); + + final CountDownLatch latch = new CountDownLatch(1); + Runnable runnable = () -> TransportShardBulkAction.performOnPrimary( + bulkShardRequest, + shard, + updateHelper, + threadPool::absoluteTimeInMillis, + new NoopMappingUpdatePerformer(), + listener -> listener.onResponse(null), + new LatchedActionListener<>( + ActionTestUtils.assertNoFailureListener( + result -> assertEquals( + VersionConflictEngineException.class, + result.replicaRequest().items()[0].getPrimaryResponse().getFailure().getCause().getClass() + ) + ), + latch + ), + threadPool, + Names.WRITE + ); + + // execute the runnable on a separate thread so that the infinite loop can be detected + Thread thread = new Thread(runnable); + thread.start(); + + // timeout the request in 10 seconds if there is an infinite loop + assertTrue(latch.await(10, TimeUnit.SECONDS)); + assertEquals(items[0].getPrimaryResponse().getFailure().getCause().getClass(), VersionConflictEngineException.class); + } + public void testForceExecutionOnRejectionAfterMappingUpdate() throws Exception { TestThreadPool rejectingThreadPool = new TestThreadPool( "TransportShardBulkActionTests#testForceExecutionOnRejectionAfterMappingUpdate",