diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 695c01367171a..52ce6fb332cd5 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -199,10 +199,20 @@ private boolean syncSegments() { try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { SegmentInfos segmentInfos = segmentInfosGatedCloseable.get(); + SegmentInfos lastCommittedSegmentInfos = indexShard.store().readLastCommittedSegmentsInfo(); + if (segmentInfos.getGeneration() != lastCommittedSegmentInfos.getGeneration()) { + logger.info("--> Different generations. segmentInfosSnapshot = {}, lastCommittedSegmentInfos = {}", + segmentInfos.getGeneration(), lastCommittedSegmentInfos.getGeneration()); + assert segmentInfos.files(false).equals(lastCommittedSegmentInfos.files(false)) :"SegmentInfos files: " + + segmentInfos.files(false) + + " do not match lastCommittedSegmentInfos files: " + + lastCommittedSegmentInfos.files(false); + } assert segmentInfos.getGeneration() == checkpoint.getSegmentsGen() : "SegmentInfos generation: " + segmentInfos.getGeneration() + " does not match metadata generation: " + checkpoint.getSegmentsGen(); + // Capture replication checkpoint before uploading the segments as upload can take some time and checkpoint can // move. long lastRefreshedCheckpoint = ((InternalEngine) indexShard.getEngine()).lastRefreshedCheckpoint(); diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index b23d2d7d0a3f8..528841f3ad202 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -641,6 +641,7 @@ public void uploadMetadata( } storeDirectory.sync(Collections.singleton(metadataFilename)); remoteMetadataDirectory.copyFrom(storeDirectory, metadataFilename, metadataFilename, IOContext.DEFAULT); + logger.debug("Uploaded metadata file: " + metadataFilename); } finally { tryAndDeleteLocalFile(metadataFilename, storeDirectory); } diff --git a/server/src/main/java/org/opensearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/opensearch/snapshots/SnapshotShardsService.java index af2f925f89726..09e91c09a13db 100644 --- a/server/src/main/java/org/opensearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/opensearch/snapshots/SnapshotShardsService.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.SegmentInfos; import org.opensearch.Version; import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterStateListener; @@ -73,6 +74,7 @@ import org.opensearch.transport.TransportService; import java.io.IOException; +import java.nio.file.NoSuchFileException; import java.util.HashMap; import java.util.Iterator; import java.util.Map; @@ -406,7 +408,23 @@ private void snapshot( long primaryTerm = indexShard.getOperationPrimaryTerm(); final IndexCommit snapshotIndexCommit = wrappedSnapshot.get(); long commitGeneration = snapshotIndexCommit.getGeneration(); - indexShard.acquireLockOnCommitData(snapshot.getSnapshotId().getUUID(), primaryTerm, commitGeneration); + try { + indexShard.acquireLockOnCommitData(snapshot.getSnapshotId().getUUID(), primaryTerm, commitGeneration); + } catch(NoSuchFileException e) { + // Explain why we are doing this + try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { + SegmentInfos segmentInfos = segmentInfosGatedCloseable.get(); + if (segmentInfos.getGeneration() != commitGeneration) { + SegmentInfos directoryInfos = indexShard.store().readLastCommittedSegmentsInfo(); + if(segmentInfos.files(false).equals(directoryInfos.files(false))) { + logger.info("Different generations for lastIndexCommit = {} and segmentInfosSnapshot = {}. As referred files are same, using generation from segmentInfosSnapshot", commitGeneration, segmentInfos.getGeneration()); + indexShard.acquireLockOnCommitData(snapshot.getSnapshotId().getUUID(), primaryTerm, segmentInfos.getGeneration()); + } else { + throw(e); + } + } + } + } try { repository.snapshotRemoteStoreIndexShard( indexShard.store(),