From 9da564b727147a47b8a048df8cdc05db146f6344 Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Fri, 8 Dec 2023 21:16:47 +0530 Subject: [PATCH] [Remote Store] Avoid repeated delete calls for the stale segment files (#11532) (#11533) * Avoid repeated delete calls for the stale segment files * Address PR comments --------- (cherry picked from commit 10d34beb85ebee0a6bf36afece16e4ed991de641) Signed-off-by: Sachin Kale Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] Co-authored-by: Sachin Kale --- .../store/RemoteSegmentStoreDirectory.java | 42 ++++++++------- .../RemoteSegmentStoreDirectoryTests.java | 54 +++++++++++++++++++ 2 files changed, 76 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 7b57fabdf1486..9c1e902606cab 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -45,7 +45,6 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.NoSuchFileException; -import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -781,6 +780,7 @@ public void deleteStaleSegments(int lastNMetadataFilesToKeep) throws IOException segmentMetadataMap.values().stream().map(metadata -> metadata.uploadedFilename).collect(Collectors.toSet()) ); } + Set deletedSegmentFiles = new HashSet<>(); for (String metadataFile : metadataFilesToBeDeleted) { Map staleSegmentFilesMetadataMap = readMetadataFile(metadataFile).getMetadata(); Set staleSegmentRemoteFilenames = staleSegmentFilesMetadataMap.values() @@ -788,31 +788,33 @@ public void deleteStaleSegments(int lastNMetadataFilesToKeep) throws IOException .map(metadata -> metadata.uploadedFilename) .collect(Collectors.toSet()); AtomicBoolean deletionSuccessful = new AtomicBoolean(true); - List nonActiveDeletedSegmentFiles = new ArrayList<>(); - staleSegmentRemoteFilenames.stream().filter(file -> !activeSegmentRemoteFilenames.contains(file)).forEach(file -> { - try { - remoteDataDirectory.deleteFile(file); - nonActiveDeletedSegmentFiles.add(file); - if (!activeSegmentFilesMetadataMap.containsKey(getLocalSegmentFilename(file))) { - segmentsUploadedToRemoteStore.remove(getLocalSegmentFilename(file)); + staleSegmentRemoteFilenames.stream() + .filter(file -> activeSegmentRemoteFilenames.contains(file) == false) + .filter(file -> deletedSegmentFiles.contains(file) == false) + .forEach(file -> { + try { + remoteDataDirectory.deleteFile(file); + deletedSegmentFiles.add(file); + if (!activeSegmentFilesMetadataMap.containsKey(getLocalSegmentFilename(file))) { + segmentsUploadedToRemoteStore.remove(getLocalSegmentFilename(file)); + } + } catch (NoSuchFileException e) { + logger.info("Segment file {} corresponding to metadata file {} does not exist in remote", file, metadataFile); + } catch (IOException e) { + deletionSuccessful.set(false); + logger.warn( + "Exception while deleting segment file {} corresponding to metadata file {}. Deletion will be re-tried", + file, + metadataFile + ); } - } catch (NoSuchFileException e) { - logger.info("Segment file {} corresponding to metadata file {} does not exist in remote", file, metadataFile); - } catch (IOException e) { - deletionSuccessful.set(false); - logger.info( - "Exception while deleting segment file {} corresponding to metadata file {}. Deletion will be re-tried", - file, - metadataFile - ); - } - }); - logger.debug("nonActiveDeletedSegmentFiles={}", nonActiveDeletedSegmentFiles); + }); if (deletionSuccessful.get()) { logger.debug("Deleting stale metadata file {} from remote segment store", metadataFile); remoteMetadataDirectory.deleteFile(metadataFile); } } + logger.debug("deletedSegmentFiles={}", deletedSegmentFiles); } public void deleteStaleSegmentsAsync(int lastNMetadataFilesToKeep) { diff --git a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java index 36cfd84ff960a..2c6c4afed69fd 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteSegmentStoreDirectoryTests.java @@ -51,6 +51,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -122,6 +123,14 @@ public class RemoteSegmentStoreDirectoryTests extends IndexShardTestCase { 1, "node-1" ); + private final String metadataFilename4 = RemoteSegmentStoreDirectory.MetadataFilenameUtils.getMetadataFilename( + 10, + 36, + 34, + 1, + 1, + "node-1" + ); @Before public void setup() throws IOException { @@ -979,6 +988,51 @@ public void testDeleteStaleCommitsActualDelete() throws Exception { verify(remoteMetadataDirectory).deleteFile(metadataFilename3); } + public void testDeleteStaleCommitsDeleteDedup() throws Exception { + Map> metadataFilenameContentMapping = new HashMap<>(populateMetadata()); + metadataFilenameContentMapping.put(metadataFilename4, metadataFilenameContentMapping.get(metadataFilename3)); + + when( + remoteMetadataDirectory.listFilesByPrefixInLexicographicOrder( + RemoteSegmentStoreDirectory.MetadataFilenameUtils.METADATA_PREFIX, + Integer.MAX_VALUE + ) + ).thenReturn(new ArrayList<>(List.of(metadataFilename, metadataFilename2, metadataFilename3, metadataFilename4))); + + when(remoteMetadataDirectory.getBlobStream(metadataFilename4)).thenAnswer( + I -> createMetadataFileBytes( + metadataFilenameContentMapping.get(metadataFilename4), + indexShard.getLatestReplicationCheckpoint(), + segmentInfos + ) + ); + + remoteSegmentStoreDirectory.init(); + + // popluateMetadata() adds stub to return 4 metadata files + // We are passing lastNMetadataFilesToKeep=2 here so that oldest 2 metadata files will be deleted + remoteSegmentStoreDirectory.deleteStaleSegmentsAsync(2); + + Set staleSegmentFiles = new HashSet<>(); + for (String metadata : metadataFilenameContentMapping.get(metadataFilename3).values()) { + staleSegmentFiles.add(metadata.split(RemoteSegmentStoreDirectory.UploadedSegmentMetadata.SEPARATOR)[1]); + } + for (String metadata : metadataFilenameContentMapping.get(metadataFilename4).values()) { + staleSegmentFiles.add(metadata.split(RemoteSegmentStoreDirectory.UploadedSegmentMetadata.SEPARATOR)[1]); + } + staleSegmentFiles.forEach(file -> { + try { + // Even with the same files in 2 stale metadata files, delete should be called only once. + verify(remoteDataDirectory, times(1)).deleteFile(file); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + assertBusy(() -> assertThat(remoteSegmentStoreDirectory.canDeleteStaleCommits.get(), is(true))); + verify(remoteMetadataDirectory).deleteFile(metadataFilename3); + verify(remoteMetadataDirectory).deleteFile(metadataFilename4); + } + public void testDeleteStaleCommitsActualDeleteIOException() throws Exception { Map> metadataFilenameContentMapping = populateMetadata(); remoteSegmentStoreDirectory.init();