From 27e39ce70d17ae3d841e2e49888e631439df2d04 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Fri, 20 Oct 2023 01:53:29 +0530 Subject: [PATCH 1/3] [Remote Store] Fix relocation failure due to transport receive timeout Signed-off-by: Ashish Singh --- .../opensearch/remotestore/RemoteStoreIT.java | 23 +++++++++++++++++++ .../org/opensearch/index/IndexSettings.java | 3 +++ .../opensearch/index/shard/IndexShard.java | 6 ++++- .../translog/InternalTranslogManager.java | 11 +++++---- .../index/translog/RemoteFsTranslog.java | 4 ++++ .../recovery/PeerRecoverySourceService.java | 3 ++- .../recovery/RemoteRecoveryTargetHandler.java | 13 +++++++++-- 7 files changed, 54 insertions(+), 9 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 1fb5c2052aded..b3b4f8e10fd31 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -509,4 +509,27 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti assertHitCount(client(dataNodes.get(1)).prepareSearch(INDEX_NAME).setSize(0).get(), 50); }); } + + public void testNoSearchIdleForAnyReplicaCount() throws ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + String primaryShardNode = internalCluster().startDataOnlyNodes(1).get(0); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureGreen(INDEX_NAME); + IndexShard indexShard = getIndexShard(primaryShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + + String replicaShardNode = internalCluster().startDataOnlyNodes(1).get(0); + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureGreen(INDEX_NAME); + assertFalse(indexShard.isSearchIdleSupported()); + + indexShard = getIndexShard(replicaShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + } } diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e90e9259f6a5c..99d2b5a74c406 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -1024,6 +1024,9 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti } private void setSearchIdleAfter(TimeValue searchIdleAfter) { + if (this.isRemoteStoreEnabled) { + logger.warn("Search idle is not supported for remote backed indices"); + } if (this.replicationType == ReplicationType.SEGMENT && this.getNumberOfReplicas() > 0) { logger.warn("Search idle is not supported for indices with replicas using 'replication.type: SEGMENT'"); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 5ebfd3863a6cf..1b7d1b2716979 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -4425,7 +4425,6 @@ public final boolean isSearchIdle() { } /** - * * Returns true if this shard supports search idle. *

* Indices using Segment Replication will ignore search idle unless there are no replicas. @@ -4434,6 +4433,11 @@ public final boolean isSearchIdle() { * a new set of segments. */ public final boolean isSearchIdleSupported() { + // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // task continues to upload to remote store periodically. + if (isRemoteTranslogEnabled()) { + return false; + } return indexSettings.isSegRepEnabled() == false || indexSettings.getNumberOfReplicas() == 0; } diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 85c52b907d326..0e4239bf785b6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,11 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - final long translogGenerationOfLastCommit = translog.getMinGenerationForSeqNo( - localCheckpointOfLastCommit + 1 - ).translogFileGeneration; - if (translog.sizeInBytesByMinGen(translogGenerationOfLastCommit) < flushThreshold) { + long minRefSeqNo = translog instanceof RemoteFsTranslog + ? ((RemoteFsTranslog) translog).getMinSeqNoToKeep() + : localCheckpointOfLastCommit + 1; + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minRefSeqNo).translogFileGeneration; + if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } /* @@ -454,7 +455,7 @@ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long fl final long translogGenerationOfNewCommit = translog.getMinGenerationForSeqNo( localCheckpointTrackerSupplier.get().getProcessedCheckpoint() + 1 ).translogFileGeneration; - return translogGenerationOfLastCommit < translogGenerationOfNewCommit + return minReferencedTranslogGeneration < translogGenerationOfNewCommit || localCheckpointTrackerSupplier.get().getProcessedCheckpoint() == localCheckpointTrackerSupplier.get().getMaxSeqNo(); } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 29c825fd383c5..790eed93bd1ec 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -544,4 +544,8 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } } + + long getMinSeqNoToKeep() { + return minSeqNoToKeep; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java index 6c7632a8a408d..cb2bedf00de99 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java @@ -376,7 +376,8 @@ private Tuple createRecovery transportService, request.targetNode(), recoverySettings, - throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime) + throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime), + shard.isRemoteTranslogEnabled() ); handler = RecoverySourceHandlerFactory.create(shard, recoveryTarget, request, recoverySettings); return Tuple.tuple(handler, recoveryTarget); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index 66f5b13449f05..d6d03cf8ecd3b 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -75,6 +75,7 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler { private final AtomicLong requestSeqNoGenerator = new AtomicLong(0); private final RetryableTransportClient retryableTransportClient; private final RemoteSegmentFileChunkWriter fileChunkWriter; + private final boolean remoteStoreEnabled; public RemoteRecoveryTargetHandler( long recoveryId, @@ -82,7 +83,8 @@ public RemoteRecoveryTargetHandler( TransportService transportService, DiscoveryNode targetNode, RecoverySettings recoverySettings, - Consumer onSourceThrottle + Consumer onSourceThrottle, + boolean remoteStoreEnabled ) { this.transportService = transportService; // It is safe to pass the retry timeout value here because RemoteRecoveryTargetHandler @@ -111,6 +113,7 @@ public RemoteRecoveryTargetHandler( requestSeqNoGenerator, onSourceThrottle ); + this.remoteStoreEnabled = remoteStoreEnabled; } public DiscoveryNode targetNode() { @@ -129,7 +132,13 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener reader = in -> TransportResponse.Empty.INSTANCE; final ActionListener responseListener = ActionListener.map(listener, r -> null); - retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + if (remoteStoreEnabled) { + // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the + // target host along with incremental segments download. This + retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); + } else { + retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + } } @Override From 2de1f2eaf6c07240e7d5a9d3e9f1cff5a4844348 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Fri, 20 Oct 2023 13:08:34 +0530 Subject: [PATCH 2/3] Fix existing extended shardIdle for remote backed shards Signed-off-by: Ashish Singh --- .../index/shard/RemoteIndexShardTests.java | 9 +++++++++ .../shard/SegmentReplicationIndexShardTests.java | 12 ++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index fe389e3b3fcb4..703a7d457d5b6 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -471,6 +471,15 @@ public void onReplicationFailure( } } + @Override + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertFalse(primary.isSearchIdleSupported()); + assertTrue(primary.isSearchIdle()); + assertTrue(primary.scheduledRefresh()); + assertFalse(primary.hasRefreshPending()); + } + private void assertSingleSegmentFile(IndexShard shard, String fileName) throws IOException { final Set segmentsFileNames = Arrays.stream(shard.store().directory().listAll()) .filter(file -> file.startsWith(IndexFileNames.SEGMENTS)) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index eab38bfe5c64d..7caff3e5f5479 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -436,13 +436,17 @@ public void testShardIdleWithNoReplicas() throws Exception { shards.startAll(); final IndexShard primary = shards.getPrimary(); shards.indexDocs(randomIntBetween(1, 10)); - // ensure search idle conditions are met. - assertTrue(primary.isSearchIdle()); - assertFalse(primary.scheduledRefresh()); - assertTrue(primary.hasRefreshPending()); + validateShardIdleWithNoReplicas(primary); } } + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertTrue(primary.isSearchIdle()); + assertFalse(primary.scheduledRefresh()); + assertTrue(primary.hasRefreshPending()); + } + /** * here we are starting a new primary shard in PrimaryMode and testing if the shard publishes checkpoint after refresh. */ From 9589791f05cd9949ff668792a07abf8d7e9b58b9 Mon Sep 17 00:00:00 2001 From: Ashish Singh Date: Fri, 20 Oct 2023 15:58:45 +0530 Subject: [PATCH 3/3] Incorporate PR review comments Signed-off-by: Ashish Singh --- .../opensearch/index/translog/InternalTranslogManager.java | 7 +++---- .../org/opensearch/index/translog/RemoteFsTranslog.java | 3 ++- .../main/java/org/opensearch/index/translog/Translog.java | 4 ++++ .../indices/recovery/RemoteRecoveryTargetHandler.java | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 0e4239bf785b6..4d0fc13d433c6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,9 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - long minRefSeqNo = translog instanceof RemoteFsTranslog - ? ((RemoteFsTranslog) translog).getMinSeqNoToKeep() - : localCheckpointOfLastCommit + 1; - final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minRefSeqNo).translogFileGeneration; + // This is the minimum seqNo that is referred in translog and considered for calculating translog size + long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 790eed93bd1ec..2dd9b1a545d4a 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -545,7 +545,8 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } - long getMinSeqNoToKeep() { + @Override + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { return minSeqNoToKeep; } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index cf7f18840a03e..b44aa6e059224 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2034,4 +2034,8 @@ public static String createEmptyTranslog( writer.close(); return uuid; } + + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minUnrefCheckpointInLastCommit; + } } diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index d6d03cf8ecd3b..37227596fdfe7 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -134,7 +134,7 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener responseListener = ActionListener.map(listener, r -> null); if (remoteStoreEnabled) { // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the - // target host along with incremental segments download. This + // target host along with incremental segments download. retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); } else { retryableTransportClient.executeRetryableAction(action, request, responseListener, reader);