From 398937a7c32880dc71e421cfc5d3551b626749fd Mon Sep 17 00:00:00 2001 From: Sooraj Sinha <81695996+soosinha@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:55:13 +0530 Subject: [PATCH] Fix valid cluster UUID logic for uncommitted cluster UUIDs (#10916) Signed-off-by: Sooraj Sinha Signed-off-by: Shivansh Arora --- .../remote/RemoteClusterStateService.java | 36 +++++++------- .../RemoteClusterStateServiceTests.java | 49 +++++++++++++++---- 2 files changed, 59 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index fa4eef79c46dc..b3309b1fd8a63 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -881,25 +881,31 @@ private Map getLatestManifestForAllClusterUUIDs * @return List of cluster UUIDs. The first element is the most recent cluster UUID in the chain */ private List createClusterChain(final Map manifestsByClusterUUID, final String clusterName) { - final Map clusterUUIDGraph = manifestsByClusterUUID.values() + final List validClusterManifests = manifestsByClusterUUID.values() .stream() + .filter(this::isValidClusterUUID) + .collect(Collectors.toList()); + final Map clusterUUIDGraph = validClusterManifests.stream() .collect(Collectors.toMap(ClusterMetadataManifest::getClusterUUID, ClusterMetadataManifest::getPreviousClusterUUID)); - final List validClusterUUIDs = manifestsByClusterUUID.values() - .stream() - .filter(m -> !isInvalidClusterUUID(m) && !clusterUUIDGraph.containsValue(m.getClusterUUID())) + final List topLevelClusterUUIDs = validClusterManifests.stream() .map(ClusterMetadataManifest::getClusterUUID) + .filter(clusterUUID -> !clusterUUIDGraph.containsValue(clusterUUID)) .collect(Collectors.toList()); - if (validClusterUUIDs.isEmpty()) { - logger.info("There is no valid previous cluster UUID"); + + if (topLevelClusterUUIDs.isEmpty()) { + // This can occur only when there are no valid cluster UUIDs + assert validClusterManifests.isEmpty() : "There are no top level cluster UUIDs even when there are valid cluster UUIDs"; + logger.info("There is no valid previous cluster UUID. All cluster UUIDs evaluated are: {}", manifestsByClusterUUID.keySet()); return Collections.emptyList(); } - if (validClusterUUIDs.size() > 1) { + if (topLevelClusterUUIDs.size() > 1) { + logger.info("Top level cluster UUIDs: {}", topLevelClusterUUIDs); // If the valid cluster UUIDs are more that 1, it means there was some race condition where // more then 2 cluster manager nodes tried to become active cluster manager and published // 2 cluster UUIDs which followed the same previous UUID. final Map manifestsByClusterUUIDTrimmed = trimClusterUUIDs( manifestsByClusterUUID, - validClusterUUIDs, + topLevelClusterUUIDs, clusterName ); if (manifestsByClusterUUID.size() == manifestsByClusterUUIDTrimmed.size()) { @@ -908,14 +914,14 @@ private List createClusterChain(final Map validChain = new ArrayList<>(); - String currentUUID = validClusterUUIDs.get(0); + String currentUUID = topLevelClusterUUIDs.get(0); while (currentUUID != null && !ClusterState.UNKNOWN_UUID.equals(currentUUID)) { validChain.add(currentUUID); // Getting the previous cluster UUID of a cluster UUID from the clusterUUID Graph @@ -942,11 +948,7 @@ private Map trimClusterUUIDs( // Here we compare the manifest of current UUID to that of previous UUID // In case currentUUID's latest manifest is same as previous UUIDs latest manifest, // that means it was restored from previousUUID and no IndexMetadata update was performed on it. - if (ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { - if (currentManifest.getIndices().isEmpty()) { - trimmedUUIDs.remove(clusterUUID); - } - } else { + if (!ClusterState.UNKNOWN_UUID.equals(currentManifest.getPreviousClusterUUID())) { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); if (isMetadataEqual(currentManifest, previousManifest, clusterName) && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { @@ -985,8 +987,8 @@ private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMeta return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); } - private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { - return !manifest.isClusterUUIDCommitted(); + private boolean isValidClusterUUID(ClusterMetadataManifest manifest) { + return manifest.isClusterUUIDCommitted(); } /** diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index ca88653f529f6..586618bd1ecff 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -911,7 +911,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean(), Collections.emptyMap()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -933,6 +933,23 @@ public void testGetValidPreviousClusterUUIDWithInvalidMultipleChains() throws IO assertThrows(IllegalStateException.class, () -> remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster")); } + public void testGetValidPreviousClusterUUIDWhenLastUUIDUncommitted() throws IOException { + Map clusterUUIDsPointers = Map.of( + "cluster-uuid1", + ClusterState.UNKNOWN_UUID, + "cluster-uuid2", + "cluster-uuid1", + "cluster-uuid3", + "cluster-uuid2" + ); + Map clusterUUIDCommitted = Map.of("cluster-uuid1", true, "cluster-uuid2", true, "cluster-uuid3", false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, clusterUUIDCommitted); + + remoteClusterStateService.start(); + String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); + assertThat(previousClusterUUID, equalTo("cluster-uuid2")); + } + public void testDeleteStaleClusterUUIDs() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); ClusterMetadataManifest clusterMetadataManifest = ClusterMetadataManifest.builder() @@ -1128,11 +1145,21 @@ public void testGlobalMetadataUploadWaitTimeSetting() { } private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, Collections.emptyMap()); } - private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) - throws IOException { + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + Map clusterUUIDCommitted + ) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false, clusterUUIDCommitted); + } + + private void mockObjectsForGettingPreviousClusterUUID( + Map clusterUUIDsPointers, + boolean differGlobalMetadata, + Map clusterUUIDCommitted + ) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1155,7 +1182,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), uploadedIndexMetadataList1, - "test-metadata1" + "test-metadata1", + clusterUUIDCommitted.getOrDefault("cluster-uuid1", true) ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1184,7 +1212,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), uploadedIndexMetadataList2, - "test-metadata2" + "test-metadata2", + clusterUUIDCommitted.getOrDefault("cluster-uuid2", true) ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1229,7 +1258,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), uploadedIndexMetadataList3, - "test-metadata3" + "test-metadata3", + clusterUUIDCommitted.getOrDefault("cluster-uuid3", true) ); mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); @@ -1257,7 +1287,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String previousClusterUUID, String stateUUID, List uploadedIndexMetadata, - String globalMetadataFileName + String globalMetadataFileName, + Boolean isUUIDCommitted ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1269,7 +1300,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID(previousClusterUUID) .committed(true) - .clusterUUIDCommitted(true) + .clusterUUIDCommitted(isUUIDCommitted) .globalMetadataFileName(globalMetadataFileName) .codecVersion(ClusterMetadataManifest.CODEC_V1) .build();