From 695fbde56fa15fa86bbdecf1b72c204902a79b3b Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:45:33 +0530 Subject: [PATCH] Add validation while updating CompatibilityMode setting (#13080) Signed-off-by: Lakshya Taragi --- .../RemoteStoreMigrationSettingsUpdateIT.java | 34 +++ .../TransportClusterUpdateSettingsAction.java | 53 +++++ .../opensearch/snapshots/RestoreService.java | 6 +- ...ransportClusterManagerNodeActionTests.java | 218 ++++++++++++++++++ 4 files changed, 308 insertions(+), 3 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java index 5ae2a976f4066..c3720e6fbbd09 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationSettingsUpdateIT.java @@ -12,11 +12,13 @@ import org.opensearch.client.Client; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsException; import org.opensearch.core.rest.RestStatus; import org.opensearch.index.IndexSettings; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotState; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.nio.file.Path; @@ -28,6 +30,7 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.STRICT; import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @@ -140,6 +143,37 @@ public void testNewRestoredIndexIsRemoteStoreBackedForRemoteStoreDirectionAndMix assertRemoteStoreBackedIndex(restoredIndexName2); } + // compatibility mode setting test + + public void testSwitchToStrictMode() throws Exception { + logger.info(" --> initialize cluster"); + initializeCluster(false); + + logger.info(" --> create a mixed mode cluster"); + setClusterMode(MIXED.mode); + addRemote = true; + String remoteNodeName = internalCluster().startNode(); + addRemote = false; + String nonRemoteNodeName = internalCluster().startNode(); + internalCluster().validateClusterFormed(); + assertNodeInCluster(remoteNodeName); + assertNodeInCluster(nonRemoteNodeName); + + logger.info(" --> attempt switching to strict mode"); + SettingsException exception = assertThrows(SettingsException.class, () -> setClusterMode(STRICT.mode)); + assertEquals( + "can not switch to STRICT compatibility mode when the cluster contains both remote and non-remote nodes", + exception.getMessage() + ); + + logger.info(" --> stop remote node so that cluster had only non-remote nodes"); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName)); + ensureStableCluster(2); + + logger.info(" --> attempt switching to strict mode"); + setClusterMode(STRICT.mode); + } + // restore indices from a snapshot private void restoreSnapshot(String snapshotRepoName, String snapshotName, String restoredIndexName) { RestoreSnapshotResponse restoreSnapshotResponse = client.admin() diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java index 2f3cc77b05550..e6c149216da09 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java @@ -45,6 +45,7 @@ import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.allocation.AllocationService; import org.opensearch.cluster.service.ClusterManagerTaskKeys; import org.opensearch.cluster.service.ClusterManagerTaskThrottler; @@ -53,12 +54,18 @@ import org.opensearch.common.Priority; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsException; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; import java.io.IOException; +import java.util.Locale; +import java.util.Set; +import java.util.stream.Collectors; /** * Transport action for updating cluster settings @@ -251,6 +258,7 @@ public void onFailure(String source, Exception e) { @Override public ClusterState execute(final ClusterState currentState) { + validateCompatibilityModeSettingRequest(request, state); final ClusterState clusterState = updater.updateSettings( currentState, clusterSettings.upgradeSettings(request.transientSettings()), @@ -264,4 +272,49 @@ public ClusterState execute(final ClusterState currentState) { ); } + /** + * Runs various checks associated with changing cluster compatibility mode + * @param request cluster settings update request, for settings to be updated and new values + * @param clusterState current state of cluster, for information on nodes + */ + public void validateCompatibilityModeSettingRequest(ClusterUpdateSettingsRequest request, ClusterState clusterState) { + Settings settings = Settings.builder().put(request.persistentSettings()).put(request.transientSettings()).build(); + if (RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING.exists(settings)) { + String value = settings.get(RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey()).toLowerCase(Locale.ROOT); + validateAllNodesOfSameVersion(clusterState.nodes()); + if (value.equals(RemoteStoreNodeService.CompatibilityMode.STRICT.mode)) { + validateAllNodesOfSameType(clusterState.nodes()); + } + } + } + + /** + * Verifies that while trying to change the compatibility mode, all nodes must have the same version. + * If not, it throws SettingsException error + * @param discoveryNodes current discovery nodes in the cluster + */ + private void validateAllNodesOfSameVersion(DiscoveryNodes discoveryNodes) { + if (discoveryNodes.getMaxNodeVersion().equals(discoveryNodes.getMinNodeVersion()) == false) { + throw new SettingsException("can not change the compatibility mode when all the nodes in cluster are not of the same version"); + } + } + + /** + * Verifies that while trying to switch to STRICT compatibility mode, all nodes must be of the + * same type (all remote or all non-remote). If not, it throws SettingsException error + * @param discoveryNodes current discovery nodes in the cluster + */ + private void validateAllNodesOfSameType(DiscoveryNodes discoveryNodes) { + Set nodeTypes = discoveryNodes.getNodes() + .values() + .stream() + .map(DiscoveryNode::isRemoteStoreNode) + .collect(Collectors.toSet()); + if (nodeTypes.size() != 1) { + throw new SettingsException( + "can not switch to STRICT compatibility mode when the cluster contains both remote and non-remote nodes" + ); + } + } + } diff --git a/server/src/main/java/org/opensearch/snapshots/RestoreService.java b/server/src/main/java/org/opensearch/snapshots/RestoreService.java index b79a6a88250f8..e6a6b747c2baf 100644 --- a/server/src/main/java/org/opensearch/snapshots/RestoreService.java +++ b/server/src/main/java/org/opensearch/snapshots/RestoreService.java @@ -680,9 +680,9 @@ private Settings getOverrideSettingsInternal() { // We will use whatever replication strategy provided by user or from snapshot metadata unless // cluster is remote store enabled or user have restricted a specific replication type in the // cluster. If cluster is undergoing remote store migration, replication strategy is strictly SEGMENT type - if (RemoteStoreNodeAttribute.isRemoteStoreAttributePresent(clusterService.getSettings()) == true - || clusterSettings.get(IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING) == true - || RemoteStoreNodeService.isMigratingToRemoteStore(clusterSettings) == true) { + if (RemoteStoreNodeAttribute.isRemoteStoreAttributePresent(clusterService.getSettings()) + || clusterSettings.get(IndicesService.CLUSTER_INDEX_RESTRICT_REPLICATION_TYPE_SETTING) + || RemoteStoreNodeService.isMigratingToRemoteStore(clusterSettings)) { MetadataCreateIndexService.updateReplicationStrategy( settingsBuilder, request.indexSettings(), diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index 538416e1137f5..b3c58164fccbb 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -16,11 +16,15 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.ActionRequestValidationException; +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.cluster.settings.TransportClusterUpdateSettingsAction; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.action.support.ThreadedActionListener; import org.opensearch.action.support.replication.ClusterStateCreationUtils; +import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.EmptyClusterInfoService; import org.opensearch.cluster.NotClusterManagerException; import org.opensearch.cluster.block.ClusterBlock; import org.opensearch.cluster.block.ClusterBlockException; @@ -28,14 +32,22 @@ import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.coordination.FailedToCommitClusterStateException; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.allocation.AllocationService; +import org.opensearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; +import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; +import org.opensearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.opensearch.cluster.service.ClusterManagerThrottlingException; import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.UUIDs; import org.opensearch.common.action.ActionFuture; import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsException; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ActionResponse; @@ -44,9 +56,12 @@ import org.opensearch.core.rest.RestStatus; import org.opensearch.discovery.ClusterManagerNotDiscoveredException; import org.opensearch.node.NodeClosedException; +import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.snapshots.EmptySnapshotsInfoService; import org.opensearch.tasks.Task; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.test.gateway.TestGatewayAllocator; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -59,7 +74,9 @@ import java.io.IOException; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.BrokenBarrierException; @@ -68,8 +85,15 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.ClusterServiceUtils.createClusterService; import static org.opensearch.test.ClusterServiceUtils.setState; +import static org.opensearch.test.VersionUtils.randomCompatibleVersion; +import static org.opensearch.test.VersionUtils.randomOpenSearchVersion; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; @@ -692,4 +716,198 @@ protected void masterOperation(Task task, Request request, ClusterState state, A assertFalse(retried.get()); assertFalse(exception.get()); } + + public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + // request to change cluster compatibility mode to STRICT + Settings currentCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + Settings intendedCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.STRICT) + .build(); + ClusterUpdateSettingsRequest request = new ClusterUpdateSettingsRequest(); + request.persistentSettings(intendedCompatibilityModeSettings); + + // mixed cluster (containing both remote and non-remote nodes) + DiscoveryNode nonRemoteNode1 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + DiscoveryNode remoteNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(nonRemoteNode1) + .localNodeId(nonRemoteNode1.getId()) + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .build(); + + Metadata metadata = Metadata.builder().persistentSettings(currentCompatibilityModeSettings).build(); + + ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT).metadata(metadata).nodes(discoveryNodes).build(); + AllocationService allocationService = new AllocationService( + new AllocationDeciders(Collections.singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + TransportClusterUpdateSettingsAction transportClusterUpdateSettingsAction = new TransportClusterUpdateSettingsAction( + transportService, + clusterService, + threadPool, + allocationService, + new ActionFilters(Collections.emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)), + clusterService.getClusterSettings() + ); + + final SettingsException exception = expectThrows( + SettingsException.class, + () -> transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, clusterState) + ); + assertEquals( + "can not switch to STRICT compatibility mode when the cluster contains both remote and non-remote nodes", + exception.getMessage() + ); + + DiscoveryNode nonRemoteNode2 = new DiscoveryNode(UUIDs.base64UUID(), buildNewFakeTransportAddress(), Version.CURRENT); + DiscoveryNode remoteNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + + // cluster with only non-remote nodes + discoveryNodes = DiscoveryNodes.builder() + .add(nonRemoteNode1) + .localNodeId(nonRemoteNode1.getId()) + .add(nonRemoteNode2) + .localNodeId(nonRemoteNode2.getId()) + .build(); + ClusterState sameTypeClusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).build(); + transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); + + // cluster with only non-remote nodes + discoveryNodes = DiscoveryNodes.builder() + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .add(remoteNode2) + .localNodeId(remoteNode2.getId()) + .build(); + sameTypeClusterState = ClusterState.builder(sameTypeClusterState).nodes(discoveryNodes).build(); + transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); + } + + public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersions() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + + // request to change cluster compatibility mode + boolean toStrictMode = randomBoolean(); + Settings currentCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + Settings intendedCompatibilityModeSettings = Settings.builder() + .put( + REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), + toStrictMode ? RemoteStoreNodeService.CompatibilityMode.STRICT : RemoteStoreNodeService.CompatibilityMode.MIXED + ) + .build(); + ClusterUpdateSettingsRequest request = new ClusterUpdateSettingsRequest(); + request.persistentSettings(intendedCompatibilityModeSettings); + + // two different but compatible open search versions for the discovery nodes + final Version version1 = randomOpenSearchVersion(random()); + final Version version2 = randomCompatibleVersion(random(), version1); + + assert version1.equals(version2) == false : "current nodes in the cluster must be of different versions"; + DiscoveryNode discoveryNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + toStrictMode ? getRemoteStoreNodeAttributes() : Collections.emptyMap(), + DiscoveryNodeRole.BUILT_IN_ROLES, + version1 + ); + DiscoveryNode discoveryNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + toStrictMode ? getRemoteStoreNodeAttributes() : Collections.emptyMap(), + DiscoveryNodeRole.BUILT_IN_ROLES, + version2 // not same as discoveryNode1 + ); + + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(discoveryNode1) + .localNodeId(discoveryNode1.getId()) + .add(discoveryNode2) + .localNodeId(discoveryNode2.getId()) + .build(); + + Metadata metadata = Metadata.builder().persistentSettings(currentCompatibilityModeSettings).build(); + + ClusterState differentVersionClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(metadata) + .nodes(discoveryNodes) + .build(); + AllocationService allocationService = new AllocationService( + new AllocationDeciders(Collections.singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + TransportClusterUpdateSettingsAction transportClusterUpdateSettingsAction = new TransportClusterUpdateSettingsAction( + transportService, + clusterService, + threadPool, + allocationService, + new ActionFilters(Collections.emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)), + clusterService.getClusterSettings() + ); + + // changing compatibility mode when all nodes are not of the same version + final SettingsException exception = expectThrows( + SettingsException.class, + () -> transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, differentVersionClusterState) + ); + assertThat( + exception.getMessage(), + containsString("can not change the compatibility mode when all the nodes in cluster are not of the same version") + ); + + // changing compatibility mode when all nodes are of the same version + discoveryNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + toStrictMode ? getRemoteStoreNodeAttributes() : Collections.emptyMap(), + DiscoveryNodeRole.BUILT_IN_ROLES, + version1 // same as discoveryNode1 + ); + discoveryNodes = DiscoveryNodes.builder() + .add(discoveryNode1) + .localNodeId(discoveryNode1.getId()) + .add(discoveryNode2) + .localNodeId(discoveryNode2.getId()) + .build(); + + ClusterState sameVersionClusterState = ClusterState.builder(differentVersionClusterState).nodes(discoveryNodes).build(); + transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameVersionClusterState); + } + + private Map getRemoteStoreNodeAttributes() { + Map remoteStoreNodeAttributes = new HashMap<>(); + remoteStoreNodeAttributes.put(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-segment-repo-1"); + remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); + return remoteStoreNodeAttributes; + } }