From f1228e968d2e78b70aa9107e10d81647495f6fca Mon Sep 17 00:00:00 2001 From: rajiv-kv <157019998+rajiv-kv@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:17:29 +0530 Subject: [PATCH 1/7] setting the response before latch countdown so that thread waiting does not encounter null (#13118) Signed-off-by: Rajiv Kumar Vaidyanathan --- .../admissioncontrol/AdmissionForClusterManagerIT.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java b/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java index 4d1964326820e..b9da5ffb86af0 100644 --- a/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionForClusterManagerIT.java @@ -170,8 +170,8 @@ public void testAdmissionControlResponseStatus() throws Exception { @Override public void sendResponse(RestResponse response) { - waitForResponse.countDown(); aliasResponse.set(response); + waitForResponse.countDown(); } }; From 1f406dbe5935227b9aa2877ed4b8932cde1e8821 Mon Sep 17 00:00:00 2001 From: Shourya Dutta Biswas <114977491+shourya035@users.noreply.github.com> Date: Tue, 30 Apr 2024 15:20:30 +0530 Subject: [PATCH 2/7] [Remote Store] Update index settings on shard movement during remote store migration (#13316) Signed-off-by: Shourya Dutta Biswas <114977491+shourya035@users.noreply.github.com> --- .../MigrationBaseTestCase.java | 22 + .../RemoteDualReplicationIT.java | 84 +-- .../RemoteMigrationIndexMetadataUpdateIT.java | 516 ++++++++++++++++++ .../TransportClusterUpdateSettingsAction.java | 21 + .../cluster/routing/IndexRoutingTable.java | 14 + .../routing/IndexShardRoutingTable.java | 16 + .../allocation/IndexMetadataUpdater.java | 45 +- .../routing/allocation/RoutingAllocation.java | 6 +- .../org/opensearch/index/IndexService.java | 12 + .../RemoteMigrationIndexMetadataUpdater.java | 181 ++++++ .../index/remote/RemoteStoreUtils.java | 57 ++ .../remotestore/RemoteStoreNodeAttribute.java | 30 + .../remotestore/RemoteStoreNodeService.java | 9 +- ...ransportClusterManagerNodeActionTests.java | 88 ++- .../routing/IndexShardRoutingTableTests.java | 45 ++ .../allocation/FailedShardsRoutingTests.java | 9 +- ...oteMigrationIndexMetadataUpdaterTests.java | 339 ++++++++++++ .../index/remote/RemoteStoreUtilsTests.java | 18 + .../index/shard/IndexShardTestUtils.java | 6 +- .../test/OpenSearchIntegTestCase.java | 6 + 20 files changed, 1480 insertions(+), 44 deletions(-) create mode 100644 server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java create mode 100644 server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java create mode 100644 server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java index 6f468f25ee5f1..611dfc2756b29 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/MigrationBaseTestCase.java @@ -34,6 +34,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import static org.opensearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.repositories.fs.ReloadableFsRepository.REPOSITORIES_FAILRATE_SETTING; @@ -199,4 +200,25 @@ public void setRefreshFrequency(int refreshFrequency) { this.refreshFrequency = refreshFrequency; } } + + public void excludeNodeSet(String attr, String value) { + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareUpdateSettings() + .setTransientSettings(Settings.builder().put("cluster.routing.allocation.exclude._" + attr, value)) + .get() + ); + } + + public void stopShardRebalancing() { + assertAcked( + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), "none").build()) + .get() + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java index 24a332212be6a..5094a7cf29c6a 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteDualReplicationIT.java @@ -30,6 +30,7 @@ import org.opensearch.test.transport.MockTransportService; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -132,8 +133,8 @@ public void testRemotePrimaryDocRepReplica() throws Exception { /* Scenario: - - Starts 1 docrep backed data node - - Creates an index with 0 replica + - Starts 2 docrep backed data node + - Creates an index with 1 replica - Starts 1 remote backed data node - Index some docs - Move primary copy from docrep to remote through _cluster/reroute @@ -145,14 +146,14 @@ public void testRemotePrimaryDocRepReplica() throws Exception { public void testRemotePrimaryDocRepAndRemoteReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting 1 docrep data nodes"); - String docrepNodeName = internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); internalCluster().validateClusterFormed(); assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); - logger.info("---> Creating index with 0 replica"); + logger.info("---> Creating index with 1 replica"); Settings zeroReplicas = Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) .put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "1s") .put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "1s") .build(); @@ -245,14 +246,26 @@ RLs on remote enabled copies are brought up to (GlobalCkp + 1) upon a flush requ pollAndCheckRetentionLeases(REMOTE_PRI_DOCREP_REMOTE_REP); } + /* + Scenario: + - Starts 2 docrep backed data node + - Creates an index with 1 replica + - Starts 1 remote backed data node + - Index some docs + - Move primary copy from docrep to remote through _cluster/reroute + - Starts another remote backed data node + - Expands index to 2 replicas. One replica copy lies in remote backed node and other in docrep backed node + - Index some more docs + - Assert retention lease consistency + */ public void testMissingRetentionLeaseCreatedOnFailedOverRemoteReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting docrep data node"); - internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); Settings zeroReplicasAndOverridenSyncIntervals = Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) .put(IndexService.GLOBAL_CHECKPOINT_SYNC_INTERVAL_SETTING.getKey(), "100ms") .put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), "100ms") .build(); @@ -323,11 +336,10 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception { /* Scenario: - - Starts 1 docrep backed data node - - Creates an index with 0 replica + - Starts 2 docrep backed data node + - Creates an index with 1 replica - Starts 1 remote backed data node - Move primary copy from docrep to remote through _cluster/reroute - - Expands index to 1 replica - Stops remote enabled node - Ensure doc count is same after failover - Index some more docs to ensure working of failed-over primary @@ -335,13 +347,13 @@ private void pollAndCheckRetentionLeases(String indexName) throws Exception { public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting 1 docrep data nodes"); - String docrepNodeName = internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + internalCluster().startDataOnlyNodes(2); internalCluster().validateClusterFormed(); assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); logger.info("---> Creating index with 0 replica"); - Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build(); + Settings excludeRemoteNode = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build(); createIndex(FAILOVER_REMOTE_TO_DOCREP, excludeRemoteNode); ensureGreen(FAILOVER_REMOTE_TO_DOCREP); initDocRepToRemoteMigration(); @@ -376,8 +388,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { ); ensureGreen(FAILOVER_REMOTE_TO_DOCREP); - logger.info("---> Expanding index to 1 replica copy"); - Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build(); + logger.info("---> Expanding index to 2 replica copies"); + Settings twoReplicas = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build(); assertAcked( internalCluster().client() .admin() @@ -412,7 +424,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { logger.info("---> Stop remote store enabled node"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName)); - ensureStableCluster(2); + ensureStableCluster(3); ensureYellow(FAILOVER_REMOTE_TO_DOCREP); shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap(); @@ -433,7 +445,7 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { refreshAndWaitForReplication(FAILOVER_REMOTE_TO_DOCREP); shardStatsMap = internalCluster().client().admin().indices().prepareStats(FAILOVER_REMOTE_TO_DOCREP).setDocs(true).get().asMap(); - assertEquals(1, shardStatsMap.size()); + assertEquals(2, shardStatsMap.size()); shardStatsMap.forEach( (shardRouting, shardStats) -> { assertEquals(firstBatch + secondBatch, shardStats.getStats().getDocs().getCount()); } ); @@ -441,8 +453,8 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { /* Scenario: - - Starts 1 docrep backed data node - - Creates an index with 0 replica + - Starts 2 docrep backed data nodes + - Creates an index with 1 replica - Starts 1 remote backed data node - Moves primary copy from docrep to remote through _cluster/reroute - Starts 1 more remote backed data node @@ -455,13 +467,13 @@ public void testFailoverRemotePrimaryToDocrepReplica() throws Exception { public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { internalCluster().startClusterManagerOnlyNode(); - logger.info("---> Starting 1 docrep data node"); - String docrepNodeName = internalCluster().startDataOnlyNode(); + logger.info("---> Starting 2 docrep data nodes"); + List docrepNodeNames = internalCluster().startDataOnlyNodes(2); internalCluster().validateClusterFormed(); assertEquals(internalCluster().client().admin().cluster().prepareGetRepositories().get().repositories().size(), 0); - logger.info("---> Creating index with 0 replica"); - createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build()); + logger.info("---> Creating index with 1 replica"); + createIndex(FAILOVER_REMOTE_TO_REMOTE, Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).build()); ensureGreen(FAILOVER_REMOTE_TO_REMOTE); initDocRepToRemoteMigration(); @@ -484,15 +496,17 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { AsyncIndexingService asyncIndexingService = new AsyncIndexingService(FAILOVER_REMOTE_TO_REMOTE); asyncIndexingService.startIndexing(); - logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", docrepNodeName, remoteNodeName1); + String primaryNodeName = primaryNodeName(FAILOVER_REMOTE_TO_REMOTE); + logger.info("---> Moving primary copy from docrep node {} to remote enabled node {}", primaryNodeName, remoteNodeName1); assertAcked( internalCluster().client() .admin() .cluster() .prepareReroute() - .add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, docrepNodeName, remoteNodeName1)) + .add(new MoveAllocationCommand(FAILOVER_REMOTE_TO_REMOTE, 0, primaryNodeName, remoteNodeName1)) .get() ); + waitForRelocation(); ensureGreen(FAILOVER_REMOTE_TO_REMOTE); assertEquals(primaryNodeName(FAILOVER_REMOTE_TO_REMOTE), remoteNodeName1); @@ -507,7 +521,13 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { .indices() .prepareUpdateSettings() .setIndices(FAILOVER_REMOTE_TO_REMOTE) - .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build()) + .setSettings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + // prevent replica copy from being allocated to the extra docrep node + .put("index.routing.allocation.exclude._name", primaryNodeName) + .build() + ) .get() ); ensureGreen(FAILOVER_REMOTE_TO_REMOTE); @@ -536,8 +556,8 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { logger.info("---> Stop remote store enabled node hosting the primary"); internalCluster().stopRandomNode(InternalTestCluster.nameFilter(remoteNodeName1)); - ensureStableCluster(3); - ensureYellow(FAILOVER_REMOTE_TO_REMOTE); + ensureStableCluster(4); + ensureYellowAndNoInitializingShards(FAILOVER_REMOTE_TO_REMOTE); DiscoveryNodes finalNodes = internalCluster().client().admin().cluster().prepareState().get().getState().getNodes(); waitUntil(() -> { @@ -580,7 +600,6 @@ public void testFailoverRemotePrimaryToRemoteReplica() throws Exception { - Creates an index with 0 replica - Starts 1 remote backed data node - Move primary copy from docrep to remote through _cluster/reroute - - Expands index to 1 replica - Stops remote enabled node - Ensure doc count is same after failover - Index some more docs to ensure working of failed-over primary @@ -664,7 +683,8 @@ private void assertReplicaAndPrimaryConsistency(String indexName, int firstBatch RemoteSegmentStats remoteSegmentStats = shardStats.getSegments().getRemoteSegmentStats(); assertTrue(remoteSegmentStats.getUploadBytesSucceeded() > 0); assertTrue(remoteSegmentStats.getTotalUploadTime() > 0); - } else { + } + if (shardRouting.unassigned() == false && shardRouting.primary() == false) { boolean remoteNode = nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode(); assertEquals( "Mismatched doc count. Is this on remote node ? " + remoteNode, diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java new file mode 100644 index 0000000000000..45679598dc551 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationIndexMetadataUpdateIT.java @@ -0,0 +1,516 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotemigration; + +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.health.ClusterHealthStatus; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.common.settings.Settings; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class RemoteMigrationIndexMetadataUpdateIT extends MigrationBaseTestCase { + /** + * Scenario: + * Performs a blue/green type migration from docrep to remote enabled cluster. + * Asserts that remote based index settings are applied after all shards move over + */ + public void testIndexSettingsUpdateAfterIndexMovedToRemoteThroughAllocationExclude() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creates an index with 1 primary and 1 replica"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + logger.info("---> Asserts index still has docrep index settings"); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Start indexing in parallel thread"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + initDocRepToRemoteMigration(); + + logger.info("---> Adding 2 remote enabled nodes to the cluster"); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Stop indexing and assert remote enabled index settings have been applied"); + asyncIndexingService.stopIndexing(); + assertRemoteProperties(indexName); + } + + /** + * Scenario: + * Performs a manual _cluster/reroute to move shards from docrep to remote enabled nodes. + * Asserts that remote based index settings are only applied for indices whose shards + * have completely moved over to remote enabled nodes + */ + public void testIndexSettingsUpdateAfterIndexMovedToRemoteThroughManualReroute() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + List docrepNodeNames = internalCluster().startDataOnlyNodes(2); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating 2 indices with 1 primary and 1 replica"); + String indexName1 = "migration-index-manual-reroute-1"; + String indexName2 = "migration-index-manual-reroute-2"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAndAssertDocrepProperties(indexName1, oneReplica); + createIndexAndAssertDocrepProperties(indexName2, oneReplica); + + logger.info("---> Starting parallel indexing on both indices"); + AsyncIndexingService indexOne = new AsyncIndexingService(indexName1); + indexOne.startIndexing(); + + AsyncIndexingService indexTwo = new AsyncIndexingService(indexName2); + indexTwo.startIndexing(); + + logger.info( + "---> Stopping shard rebalancing to ensure shards do not automatically move over to newer nodes after they are launched" + ); + stopShardRebalancing(); + + logger.info("---> Starting 2 remote store enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + List remoteNodeNames = internalCluster().startDataOnlyNodes(2); + internalCluster().validateClusterFormed(); + + String primaryNode = primaryNodeName(indexName1); + String replicaNode = docrepNodeNames.stream() + .filter(nodeName -> nodeName.equals(primaryNodeName(indexName1)) == false) + .collect(Collectors.toList()) + .get(0); + + logger.info("---> Moving over both shard copies for the first index to remote enabled nodes"); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName1, 0, primaryNode, remoteNodeNames.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName1, 0, replicaNode, remoteNodeNames.get(1))) + .execute() + .actionGet() + ); + waitForRelocation(); + + logger.info("---> Moving only primary for the second index to remote enabled nodes"); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName2, 0, primaryNodeName(indexName2), remoteNodeNames.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Stopping indexing"); + indexOne.stopIndexing(); + indexTwo.stopIndexing(); + + logger.info("---> Assert remote settings are applied for index one but not for index two"); + assertRemoteProperties(indexName1); + assertDocrepProperties(indexName2); + } + + /** + * Scenario: + * Creates a mixed mode cluster. One index gets created before remote nodes are introduced, + * while the other one is created after remote nodes are added. + *

+ * For the first index, asserts docrep settings at first, excludes docrep nodes from + * allocation and asserts that remote index settings are applied after all shards + * have been relocated. + *

+ * For the second index, asserts that it already has remote enabled settings. + * Indexes some more docs and asserts that the index metadata version does not increment + */ + public void testIndexSettingsUpdatedOnlyForMigratingIndex() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating the first index with 1 primary and 1 replica"); + String indexName = "migration-index"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService indexingService = new AsyncIndexingService(indexName); + indexingService.startIndexing(); + + logger.info("---> Storing current index metadata version"); + long initalMetadataVersion = internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(indexName) + .getVersion(); + + logger.info("---> Adding 2 remote enabled nodes to the cluster"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + + waitForRelocation(); + waitNoPendingTasksOnAll(); + indexingService.stopIndexing(); + + logger.info("---> Assert remote settings are applied"); + assertRemoteProperties(indexName); + assertTrue( + initalMetadataVersion < internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(indexName) + .getVersion() + ); + + logger.info("---> Creating a new index on remote enabled nodes"); + String secondIndex = "remote-index"; + createIndex( + secondIndex, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).build() + ); + indexBulk(secondIndex, 100); + initalMetadataVersion = internalCluster().client() + .admin() + .cluster() + .prepareState() + .get() + .getState() + .metadata() + .index(secondIndex) + .getVersion(); + refresh(secondIndex); + ensureGreen(secondIndex); + + waitNoPendingTasksOnAll(); + + assertRemoteProperties(secondIndex); + + logger.info("---> Assert metadata version is not changed"); + assertEquals( + initalMetadataVersion, + internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(secondIndex).getVersion() + ); + } + + /** + * Scenario: + * Creates an index with 1 primary, 2 replicas on 2 docrep nodes. Since the replica + * configuration is incorrect, the index stays YELLOW. + * Starts 2 more remote nodes and initiates shard relocation through allocation exclusion. + * After shard relocation completes, shuts down the docrep nodes and asserts remote + * index settings are applied even when the index is in YELLOW state + */ + public void testIndexSettingsUpdatedEvenForMisconfiguredReplicas() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + List docrepNodes = internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAssertHealthAndDocrepProperties(indexName, oneReplica, this::ensureYellowAndNoInitializingShards); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + + logger.info("---> Starts 2 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + asyncIndexingService.stopIndexing(); + + logger.info("---> Assert cluster has turned green since more nodes are added to the cluster"); + ensureGreen(indexName); + + logger.info("---> Assert index still has dcorep settings since replica copies are still on docrep nodes"); + assertDocrepProperties(indexName); + + logger.info("---> Stopping docrep nodes"); + for (String node : docrepNodes) { + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node)); + } + waitNoPendingTasksOnAll(); + ensureYellowAndNoInitializingShards(indexName); + + logger.info("---> Assert remote settings are applied"); + assertRemoteProperties(indexName); + } + + /** + * Scenario: + * Creates an index with 1 primary, 2 replicas on 2 docrep nodes. + * Starts 2 more remote nodes and initiates shard relocation through allocation exclusion. + * After shard relocation completes, restarts the docrep node holding extra replica shard copy + * and asserts remote index settings are applied as soon as the docrep replica copy is unassigned + */ + public void testIndexSettingsUpdatedWhenDocrepNodeIsRestarted() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 2 docrep nodes"); + addRemote = false; + List docrepNodes = internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + String indexName = "migration-index-allocation-exclude"; + Settings oneReplica = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .build(); + createIndexAssertHealthAndDocrepProperties(indexName, oneReplica, this::ensureYellowAndNoInitializingShards); + + logger.info("---> Starting indexing in parallel"); + AsyncIndexingService asyncIndexingService = new AsyncIndexingService(indexName); + asyncIndexingService.startIndexing(); + + logger.info("---> Starts 2 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + internalCluster().startDataOnlyNodes(2, Settings.builder().put("node.attr._type", "remote").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + asyncIndexingService.stopIndexing(); + + logger.info("---> Assert cluster has turned green since more nodes are added to the cluster"); + ensureGreen(indexName); + + logger.info("---> Assert index still has dcorep settings since replica copies are still on docrep nodes"); + assertDocrepProperties(indexName); + + ClusterState clusterState = internalCluster().client().admin().cluster().prepareState().get().getState(); + DiscoveryNodes nodes = clusterState.nodes(); + + String docrepReplicaNodeName = ""; + for (ShardRouting shardRouting : clusterState.routingTable().index(indexName).shard(0).getShards()) { + if (nodes.get(shardRouting.currentNodeId()).isRemoteStoreNode() == false) { + docrepReplicaNodeName = nodes.get(shardRouting.currentNodeId()).getName(); + break; + } + } + excludeNodeSet("type", null); + + logger.info("---> Stopping docrep node holding the replica copy"); + internalCluster().restartNode(docrepReplicaNodeName); + ensureStableCluster(5); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert remote index settings have been applied"); + assertRemoteProperties(indexName); + logger.info("---> Assert cluster is yellow since remote index settings have been applied"); + ensureYellowAndNoInitializingShards(indexName); + } + + /** + * Scenario: + * Creates a docrep cluster with 3 nodes and an index with 1 primary and 2 replicas. + * Adds 3 more remote nodes to the cluster and moves over the primary copy from docrep + * to remote through _cluster/reroute. Asserts that the remote store path based metadata + * have been applied to the index. + * Moves over the first replica copy and asserts that the remote store based settings has not been applied + * Excludes docrep nodes from allocation to force migration of the 3rd replica copy and asserts remote + * store settings has been applied as all shards have moved over + */ + public void testRemotePathMetadataAddedWithFirstPrimaryMovingToRemote() throws Exception { + String indexName = "index-1"; + internalCluster().startClusterManagerOnlyNode(); + + logger.info("---> Starting 3 docrep nodes"); + internalCluster().startDataOnlyNodes(3, Settings.builder().put("node.attr._type", "docrep").build()); + internalCluster().validateClusterFormed(); + + logger.info("---> Creating index with 1 primary and 2 replicas"); + Settings oneReplica = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 2).build(); + createIndexAndAssertDocrepProperties(indexName, oneReplica); + + logger.info("---> Adding 3 remote enabled nodes"); + initDocRepToRemoteMigration(); + addRemote = true; + List remoteEnabledNodes = internalCluster().startDataOnlyNodes( + 3, + Settings.builder().put("node.attr._type", "remote").build() + ); + + logger.info("---> Moving primary copy to remote enabled node"); + String primaryNodeName = primaryNodeName(indexName); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName, 0, primaryNodeName, remoteEnabledNodes.get(0))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert custom remote path based metadata is applied"); + assertCustomIndexMetadata(indexName); + + logger.info("---> Moving over one replica copy to remote enabled node"); + String replicaNodeName = replicaNodeName(indexName); + assertAcked( + client().admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(indexName, 0, replicaNodeName, remoteEnabledNodes.get(1))) + .execute() + .actionGet() + ); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert index still has docrep settings"); + assertDocrepProperties(indexName); + + logger.info("---> Excluding docrep nodes from allocation"); + excludeNodeSet("type", "docrep"); + waitForRelocation(); + waitNoPendingTasksOnAll(); + + logger.info("---> Assert index has remote store settings"); + assertRemoteProperties(indexName); + } + + private void createIndexAndAssertDocrepProperties(String index, Settings settings) { + createIndexAssertHealthAndDocrepProperties(index, settings, this::ensureGreen); + } + + private void createIndexAssertHealthAndDocrepProperties( + String index, + Settings settings, + Function ensureState + ) { + createIndex(index, settings); + refresh(index); + ensureState.apply(index); + assertDocrepProperties(index); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == false + * - index.remote_store.segment.repository == null + * - index.remote_store.translog.repository == null + * - index.replication.type == DOCUMENT + */ + private void assertDocrepProperties(String index) { + logger.info("---> Asserting docrep index settings"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + Settings settings = iMd.getSettings(); + assertFalse(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(settings)); + assertFalse(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(settings)); + assertFalse(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(settings)); + assertEquals(ReplicationType.DOCUMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(settings)); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == true + * - index.remote_store.segment.repository != null + * - index.remote_store.translog.repository != null + * - index.replication.type == SEGMENT + * Asserts index metadata customs has the remote_store key + */ + private void assertRemoteProperties(String index) { + logger.info("---> Asserting remote index settings"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + Settings settings = iMd.getSettings(); + assertTrue(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(settings)); + assertTrue(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(settings)); + assertTrue(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(settings)); + assertEquals(ReplicationType.SEGMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(settings)); + assertNotNull(iMd.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)); + } + + /** + * Asserts index metadata customs has the remote_store key + */ + private void assertCustomIndexMetadata(String index) { + logger.info("---> Asserting custom index metadata"); + IndexMetadata iMd = internalCluster().client().admin().cluster().prepareState().get().getState().metadata().index(index); + assertNotNull(iMd.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY)); + } +} diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java index e6c149216da09..6292d32fee26d 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java @@ -42,6 +42,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.block.ClusterBlockException; import org.opensearch.cluster.block.ClusterBlockLevel; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; @@ -58,15 +59,19 @@ import org.opensearch.common.settings.SettingsException; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; import java.io.IOException; +import java.util.Collection; import java.util.Locale; import java.util.Set; import java.util.stream.Collectors; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasAllRemoteStoreRelatedMetadata; + /** * Transport action for updating cluster settings * @@ -284,6 +289,7 @@ public void validateCompatibilityModeSettingRequest(ClusterUpdateSettingsRequest validateAllNodesOfSameVersion(clusterState.nodes()); if (value.equals(RemoteStoreNodeService.CompatibilityMode.STRICT.mode)) { validateAllNodesOfSameType(clusterState.nodes()); + validateIndexSettings(clusterState); } } } @@ -317,4 +323,19 @@ private void validateAllNodesOfSameType(DiscoveryNodes discoveryNodes) { } } + /** + * Verifies that while trying to switch to STRICT compatibility mode, + * all indices in the cluster have {@link RemoteMigrationIndexMetadataUpdater#indexHasAllRemoteStoreRelatedMetadata(IndexMetadata)} as true. + * If not, throws {@link SettingsException} + * @param clusterState current cluster state + */ + private void validateIndexSettings(ClusterState clusterState) { + Collection allIndicesMetadata = clusterState.metadata().indices().values(); + if (allIndicesMetadata.isEmpty() == false + && allIndicesMetadata.stream().anyMatch(indexMetadata -> indexHasAllRemoteStoreRelatedMetadata(indexMetadata) == false)) { + throw new SettingsException( + "can not switch to STRICT compatibility mode since all indices in the cluster does not have remote store based index settings" + ); + } + } } diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index faadc3f7583fb..7c179f6d4d8fd 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -302,6 +302,20 @@ public List shardsWithState(ShardRoutingState state) { return shards; } + /** + * Returns a {@link List} of shards that match the provided {@link Predicate} + * + * @param predicate {@link Predicate} to apply + * @return a {@link List} of shards that match one of the given {@link Predicate} + */ + public List shardsMatchingPredicate(Predicate predicate) { + List shards = new ArrayList<>(); + for (IndexShardRoutingTable shardRoutingTable : this) { + shards.addAll(shardRoutingTable.shardsMatchingPredicate(predicate)); + } + return shards; + } + public int shardsMatchingPredicateCount(Predicate predicate) { int count = 0; for (IndexShardRoutingTable shardRoutingTable : this) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java index 2c250f6a5d86e..fd8cbea42c12f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexShardRoutingTable.java @@ -904,6 +904,22 @@ public List shardsWithState(ShardRoutingState state) { return shards; } + /** + * Returns a {@link List} of shards that match the provided {@link Predicate} + * + * @param predicate {@link Predicate} to apply + * @return a {@link List} of shards that match one of the given {@link Predicate} + */ + public List shardsMatchingPredicate(Predicate predicate) { + List shards = new ArrayList<>(); + for (ShardRouting shardEntry : this) { + if (predicate.test(shardEntry)) { + shards.add(shardEntry); + } + } + return shards; + } + public int shardsMatchingPredicateCount(Predicate predicate) { int count = 0; for (ShardRouting shardEntry : this) { diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java index 7fc78b05880f3..ddcccd597e894 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/IndexMetadataUpdater.java @@ -32,10 +32,12 @@ package org.opensearch.cluster.routing.allocation; +import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingChangesObserver; @@ -45,6 +47,7 @@ import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater; import java.util.Collections; import java.util.Comparator; @@ -67,14 +70,15 @@ * @opensearch.internal */ public class IndexMetadataUpdater extends RoutingChangesObserver.AbstractRoutingChangesObserver { + private final Logger logger = LogManager.getLogger(IndexMetadataUpdater.class); private final Map shardChanges = new HashMap<>(); + private boolean ongoingRemoteStoreMigration = false; @Override public void shardInitialized(ShardRouting unassignedShard, ShardRouting initializedShard) { assert initializedShard.isRelocationTarget() == false : "shardInitialized is not called on relocation target: " + initializedShard; if (initializedShard.primary()) { increasePrimaryTerm(initializedShard.shardId()); - Updates updates = changes(initializedShard.shardId()); assert updates.initializedPrimary == null : "Primary cannot be initialized more than once in same allocation round: " + "(previous: " @@ -113,6 +117,12 @@ public void shardFailed(ShardRouting failedShard, UnassignedInfo unassignedInfo) } increasePrimaryTerm(failedShard.shardId()); } + + // Track change through shardChanges Map regardless of above-mentioned conditions + // To be used to update index metadata while computing new cluster state + if (ongoingRemoteStoreMigration) { + changes(failedShard.shardId()); + } } @Override @@ -120,20 +130,34 @@ public void relocationCompleted(ShardRouting removedRelocationSource) { removeAllocationId(removedRelocationSource); } + /** + * Adds the target {@link ShardRouting} to the tracking updates set. + * Used to track started relocations while applying changes to the new {@link ClusterState} + */ + @Override + public void relocationStarted(ShardRouting startedShard, ShardRouting targetRelocatingShard) { + // Store change in shardChanges Map regardless of above-mentioned conditions + // To be used to update index metadata while computing new cluster state + if (ongoingRemoteStoreMigration) { + changes(targetRelocatingShard.shardId()); + } + } + /** * Updates the current {@link Metadata} based on the changes of this RoutingChangesObserver. Specifically * we update {@link IndexMetadata#getInSyncAllocationIds()} and {@link IndexMetadata#primaryTerm(int)} based on * the changes made during this allocation. + *
+ * Manipulates index settings or index metadata during an ongoing remote store migration * * @param oldMetadata {@link Metadata} object from before the routing nodes was changed. * @param newRoutingTable {@link RoutingTable} object after routing changes were applied. * @return adapted {@link Metadata}, potentially the original one if no change was needed. */ - public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable) { + public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable, DiscoveryNodes discoveryNodes) { Map>> changesGroupedByIndex = shardChanges.entrySet() .stream() .collect(Collectors.groupingBy(e -> e.getKey().getIndex())); - Metadata.Builder metadataBuilder = null; for (Map.Entry>> indexChanges : changesGroupedByIndex.entrySet()) { Index index = indexChanges.getKey(); @@ -144,6 +168,17 @@ public Metadata applyChanges(Metadata oldMetadata, RoutingTable newRoutingTable) Updates updates = shardEntry.getValue(); indexMetadataBuilder = updateInSyncAllocations(newRoutingTable, oldIndexMetadata, indexMetadataBuilder, shardId, updates); indexMetadataBuilder = updatePrimaryTerm(oldIndexMetadata, indexMetadataBuilder, shardId, updates); + if (ongoingRemoteStoreMigration) { + RemoteMigrationIndexMetadataUpdater migrationImdUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + newRoutingTable, + oldIndexMetadata, + oldMetadata.settings(), + logger + ); + migrationImdUpdater.maybeUpdateRemoteStorePathStrategy(indexMetadataBuilder, index.getName()); + migrationImdUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, index.getName()); + } } if (indexMetadataBuilder != null) { @@ -369,6 +404,10 @@ private void increasePrimaryTerm(ShardId shardId) { changes(shardId).increaseTerm = true; } + public void setOngoingRemoteStoreMigration(boolean ongoingRemoteStoreMigration) { + this.ongoingRemoteStoreMigration = ongoingRemoteStoreMigration; + } + private static class Updates { private boolean increaseTerm; // whether primary term should be increased private Set addedAllocationIds = new HashSet<>(); // allocation ids that should be added to the in-sync set diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java index bf2db57128517..fd789774f6f4f 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/RoutingAllocation.java @@ -55,6 +55,7 @@ import static java.util.Collections.emptySet; import static java.util.Collections.unmodifiableSet; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.isMigratingToRemoteStore; /** * The {@link RoutingAllocation} keep the state of the current allocation @@ -125,6 +126,9 @@ public RoutingAllocation( this.clusterInfo = clusterInfo; this.shardSizeInfo = shardSizeInfo; this.currentNanoTime = currentNanoTime; + if (isMigratingToRemoteStore(metadata)) { + indexMetadataUpdater.setOngoingRemoteStoreMigration(true); + } } /** returns the nano time captured at the beginning of the allocation. used to make sure all time based decisions are aligned */ @@ -267,7 +271,7 @@ public RoutingChangesObserver changes() { * Returns updated {@link Metadata} based on the changes that were made to the routing nodes */ public Metadata updateMetadataWithRoutingChanges(RoutingTable newRoutingTable) { - return indexMetadataUpdater.applyChanges(metadata, newRoutingTable); + return indexMetadataUpdater.applyChanges(metadata, newRoutingTable, nodes()); } /** diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 14c6cecb1f847..e501d7eff3f81 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -132,6 +132,7 @@ import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.common.collect.MapBuilder.newMapBuilder; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdater.indexHasRemoteStoreSettings; /** * The main OpenSearch index service @@ -516,6 +517,17 @@ public synchronized IndexShard createShard( ); } remoteStore = new Store(shardId, this.indexSettings, remoteDirectory, lock, Store.OnClose.EMPTY, path); + } else { + // Disallow shards with remote store based settings to be created on non-remote store enabled nodes + // Even though we have `RemoteStoreMigrationAllocationDecider` in place to prevent something like this from happening at the + // allocation level, + // keeping this defensive check in place + // TODO: Remove this once remote to docrep migration is supported + if (indexHasRemoteStoreSettings(indexSettings)) { + throw new IllegalStateException( + "[{" + routing.shardId() + "}] Cannot initialize shards with remote store index settings on non-remote store nodes" + ); + } } Directory directory = directoryFactory.newDirectory(this.indexSettings, path); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java b/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java new file mode 100644 index 0000000000000..761fa20ea64e5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdater.java @@ -0,0 +1,181 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.remote.RemoteStoreEnums.PathType; +import org.opensearch.indices.replication.common.ReplicationType; + +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.opensearch.cluster.metadata.IndexMetadata.REMOTE_STORE_CUSTOM_KEY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.index.remote.RemoteStoreUtils.determineRemoteStorePathStrategyDuringMigration; +import static org.opensearch.index.remote.RemoteStoreUtils.getRemoteStoreRepoName; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; + +/** + * Utils for checking and mutating cluster state during remote migration + * + * @opensearch.internal + */ +public class RemoteMigrationIndexMetadataUpdater { + private final DiscoveryNodes discoveryNodes; + private final RoutingTable routingTable; + private final Settings clusterSettings; + private final IndexMetadata indexMetadata; + private final Logger logger; + + public RemoteMigrationIndexMetadataUpdater( + DiscoveryNodes discoveryNodes, + RoutingTable routingTable, + IndexMetadata indexMetadata, + Settings clusterSettings, + Logger logger + + ) { + this.discoveryNodes = discoveryNodes; + this.routingTable = routingTable; + this.clusterSettings = clusterSettings; + this.indexMetadata = indexMetadata; + this.logger = logger; + } + + /** + * During docrep to remote store migration, applies the following remote store based index settings + * once all shards of an index have moved over to remote store enabled nodes + *
+ * Also appends the requisite Remote Store Path based custom metadata to the existing index metadata + */ + public void maybeAddRemoteIndexSettings(IndexMetadata.Builder indexMetadataBuilder, String index) { + Settings currentIndexSettings = indexMetadata.getSettings(); + if (needsRemoteIndexSettingsUpdate(routingTable.indicesRouting().get(index), discoveryNodes, currentIndexSettings)) { + logger.info( + "Index {} does not have remote store based index settings but all primary shards and STARTED replica shards have moved to remote enabled nodes. Applying remote store settings to the index", + index + ); + Map remoteRepoNames = getRemoteStoreRepoName(discoveryNodes); + String segmentRepoName = remoteRepoNames.get(REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY); + String tlogRepoName = remoteRepoNames.get(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY); + assert Objects.nonNull(segmentRepoName) && Objects.nonNull(tlogRepoName) : "Remote repo names cannot be null"; + Settings.Builder indexSettingsBuilder = Settings.builder().put(currentIndexSettings); + updateRemoteStoreSettings(indexSettingsBuilder, segmentRepoName, tlogRepoName); + indexMetadataBuilder.settings(indexSettingsBuilder); + indexMetadataBuilder.settingsVersion(1 + indexMetadata.getVersion()); + } else { + logger.debug("Index {} does not satisfy criteria for applying remote store settings", index); + } + } + + /** + * Returns true iff all the below conditions are true: + * - All primary shards are in {@link ShardRoutingState#STARTED} state and are in remote store enabled nodes + * - No replica shard in {@link ShardRoutingState#RELOCATING} state + * - All {@link ShardRoutingState#STARTED} replica shards are in remote store enabled nodes + * + * @param indexRoutingTable current {@link IndexRoutingTable} from cluster state + * @param discoveryNodes set of discovery nodes from cluster state + * @param currentIndexSettings current {@link IndexMetadata} from cluster state + * @return true or false depending on the met conditions + */ + private boolean needsRemoteIndexSettingsUpdate( + IndexRoutingTable indexRoutingTable, + DiscoveryNodes discoveryNodes, + Settings currentIndexSettings + ) { + assert currentIndexSettings != null : "IndexMetadata for a shard cannot be null"; + if (indexHasRemoteStoreSettings(currentIndexSettings) == false) { + boolean allPrimariesStartedAndOnRemote = indexRoutingTable.shardsMatchingPredicate(ShardRouting::primary) + .stream() + .allMatch(shardRouting -> shardRouting.started() && discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + List replicaShards = indexRoutingTable.shardsMatchingPredicate(shardRouting -> shardRouting.primary() == false); + boolean noRelocatingReplicas = replicaShards.stream().noneMatch(ShardRouting::relocating); + boolean allStartedReplicasOnRemote = replicaShards.stream() + .filter(ShardRouting::started) + .allMatch(shardRouting -> discoveryNodes.get(shardRouting.currentNodeId()).isRemoteStoreNode()); + return allPrimariesStartedAndOnRemote && noRelocatingReplicas && allStartedReplicasOnRemote; + } + return false; + } + + /** + * Updates the remote store path strategy metadata for the index when it is migrating to remote. + * This is run during state change of each shard copy when the cluster is in `MIXED` mode and the direction of migration is `REMOTE_STORE` + * Should not interfere with docrep functionality even if the index is in docrep nodes since this metadata + * is not used anywhere in the docrep flow + * Checks are in place to make this execution no-op if the index metadata is already present. + * + * @param indexMetadataBuilder Mutated {@link IndexMetadata.Builder} having the previous state updates + * @param index index name + */ + public void maybeUpdateRemoteStorePathStrategy(IndexMetadata.Builder indexMetadataBuilder, String index) { + if (indexHasRemotePathMetadata(indexMetadata) == false) { + logger.info("Adding remote store path strategy for index [{}] during migration", index); + indexMetadataBuilder.putCustom( + REMOTE_STORE_CUSTOM_KEY, + determineRemoteStorePathStrategyDuringMigration(clusterSettings, discoveryNodes) + ); + } else { + logger.debug("Index {} already has remote store path strategy", index); + } + } + + public static boolean indexHasAllRemoteStoreRelatedMetadata(IndexMetadata indexMetadata) { + return indexHasRemoteStoreSettings(indexMetadata.getSettings()) && indexHasRemotePathMetadata(indexMetadata); + } + + /** + * Assert current index settings have: + * - index.remote_store.enabled == true + * - index.remote_store.segment.repository != null + * - index.remote_store.translog.repository != null + * - index.replication.type == SEGMENT + * + * @param indexSettings Current index settings + * @return true if all above conditions match. false otherwise + */ + public static boolean indexHasRemoteStoreSettings(Settings indexSettings) { + return IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexSettings) + && IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexSettings) == ReplicationType.SEGMENT; + } + + /** + * Asserts current index metadata customs has the {@link IndexMetadata#REMOTE_STORE_CUSTOM_KEY} key. + * If it does, checks if the path_type sub-key is present + * + * @param indexMetadata Current index metadata + * @return true if all above conditions match. false otherwise + */ + public static boolean indexHasRemotePathMetadata(IndexMetadata indexMetadata) { + Map customMetadata = indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY); + return Objects.nonNull(customMetadata) && Objects.nonNull(customMetadata.get(PathType.NAME)); + } + + public static void updateRemoteStoreSettings(Settings.Builder settingsBuilder, String segmentRepository, String translogRepository) { + settingsBuilder.put(SETTING_REMOTE_STORE_ENABLED, true) + .put(SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(SETTING_REMOTE_SEGMENT_STORE_REPOSITORY, segmentRepository) + .put(SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY, translogRepository); + } +} diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java index 7208dac162e1a..27b1b88034573 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStoreUtils.java @@ -8,8 +8,16 @@ package org.opensearch.index.remote; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.Version; +import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import java.nio.ByteBuffer; import java.util.Arrays; @@ -19,14 +27,19 @@ import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.function.Function; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; + /** * Utils for remote store * * @opensearch.internal */ public class RemoteStoreUtils { + private static final Logger logger = LogManager.getLogger(RemoteStoreUtils.class); public static final int LONG_MAX_LENGTH = String.valueOf(Long.MAX_VALUE).length(); /** @@ -167,4 +180,48 @@ public static RemoteStorePathStrategy determineRemoteStorePathStrategy(IndexMeta } return new RemoteStorePathStrategy(RemoteStoreEnums.PathType.FIXED); } + + /** + * Generates the remote store path type information to be added to custom data of index metadata during migration + * + * @param clusterSettings Current Cluster settings from {@link ClusterState} + * @param discoveryNodes Current {@link DiscoveryNodes} from the cluster state + * @return {@link Map} to be added as custom data in index metadata + */ + public static Map determineRemoteStorePathStrategyDuringMigration( + Settings clusterSettings, + DiscoveryNodes discoveryNodes + ) { + Version minNodeVersion = discoveryNodes.getMinNodeVersion(); + RemoteStoreEnums.PathType pathType = Version.CURRENT.compareTo(minNodeVersion) <= 0 + ? CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.get(clusterSettings) + : RemoteStoreEnums.PathType.FIXED; + RemoteStoreEnums.PathHashAlgorithm pathHashAlgorithm = pathType == RemoteStoreEnums.PathType.FIXED + ? null + : CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.get(clusterSettings); + Map remoteCustomData = new HashMap<>(); + remoteCustomData.put(RemoteStoreEnums.PathType.NAME, pathType.name()); + if (Objects.nonNull(pathHashAlgorithm)) { + remoteCustomData.put(RemoteStoreEnums.PathHashAlgorithm.NAME, pathHashAlgorithm.name()); + } + return remoteCustomData; + } + + /** + * Fetches segment and translog repository names from remote store node attributes. + * Returns a blank {@link HashMap} if the cluster does not contain any remote nodes. + *
+ * Caller need to handle null checks if {@link DiscoveryNodes} object does not have any remote nodes + * + * @param discoveryNodes Current set of {@link DiscoveryNodes} in the cluster + * @return {@link Map} of data repository node attributes keys and their values + */ + public static Map getRemoteStoreRepoName(DiscoveryNodes discoveryNodes) { + Optional remoteNode = discoveryNodes.getNodes() + .values() + .stream() + .filter(DiscoveryNode::isRemoteStoreNode) + .findFirst(); + return remoteNode.map(RemoteStoreNodeAttribute::getDataRepoNames).orElseGet(HashMap::new); + } } diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java index a3bfe1195d8cc..b10ec0d99c3d5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeAttribute.java @@ -18,6 +18,7 @@ import org.opensearch.repositories.blobstore.BlobStoreRepository; import java.util.ArrayList; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -47,6 +48,11 @@ public class RemoteStoreNodeAttribute { public static final String REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX = "remote_store.repository.%s.settings."; private final RepositoriesMetadata repositoriesMetadata; + public static List SUPPORTED_DATA_REPO_NAME_ATTRIBUTES = List.of( + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY + ); + /** * Creates a new {@link RemoteStoreNodeAttribute} */ @@ -185,6 +191,30 @@ public RepositoriesMetadata getRepositoriesMetadata() { return this.repositoriesMetadata; } + /** + * Return {@link Map} of all the supported data repo names listed on {@link RemoteStoreNodeAttribute#SUPPORTED_DATA_REPO_NAME_ATTRIBUTES} + * + * @param node Node to fetch attributes from + * @return {@link Map} of all remote store data repo attribute keys and their values + */ + public static Map getDataRepoNames(DiscoveryNode node) { + assert remoteDataAttributesPresent(node.getAttributes()); + Map dataRepoNames = new HashMap<>(); + for (String supportedRepoAttribute : SUPPORTED_DATA_REPO_NAME_ATTRIBUTES) { + dataRepoNames.put(supportedRepoAttribute, node.getAttributes().get(supportedRepoAttribute)); + } + return dataRepoNames; + } + + private static boolean remoteDataAttributesPresent(Map nodeAttrs) { + for (String supportedRepoAttributes : SUPPORTED_DATA_REPO_NAME_ATTRIBUTES) { + if (nodeAttrs.get(supportedRepoAttributes) == null || nodeAttrs.get(supportedRepoAttributes).isEmpty()) { + return false; + } + } + return true; + } + @Override public int hashCode() { // The hashCode is generated by computing the hash of all the repositoryMetadata present in diff --git a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java index adfb751421db7..874c9408de6c5 100644 --- a/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java +++ b/server/src/main/java/org/opensearch/node/remotestore/RemoteStoreNodeService.java @@ -227,7 +227,14 @@ public RepositoriesMetadata updateRepositoriesMetadata(DiscoveryNode joiningNode } /** - * To check if the cluster is undergoing remote store migration + * Returns true iff current cluster settings have: + *
+ * - remote_store.compatibility_mode set to mixed + *
+ * - migration.direction set to remote_store + *
+ * false otherwise + * * @param clusterSettings cluster level settings * @return * true For REMOTE_STORE migration direction and MIXED compatibility mode, diff --git a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java index b3eb2443fa940..35c5c5e605b4d 100644 --- a/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/clustermanager/TransportClusterManagerNodeActionTests.java @@ -86,6 +86,8 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithDocrepSettings; +import static org.opensearch.index.remote.RemoteMigrationIndexMetadataUpdaterTests.createIndexMetadataWithRemoteStoreSettings; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; @@ -791,7 +793,9 @@ public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { .add(nonRemoteNode2) .localNodeId(nonRemoteNode2.getId()) .build(); - ClusterState sameTypeClusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).build(); + + metadata = createIndexMetadataWithRemoteStoreSettings("test-index"); + ClusterState sameTypeClusterState = ClusterState.builder(clusterState).nodes(discoveryNodes).metadata(metadata).build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); // cluster with only non-remote nodes @@ -801,10 +805,84 @@ public void testDontAllowSwitchingToStrictCompatibilityModeForMixedCluster() { .add(remoteNode2) .localNodeId(remoteNode2.getId()) .build(); - sameTypeClusterState = ClusterState.builder(sameTypeClusterState).nodes(discoveryNodes).build(); + sameTypeClusterState = ClusterState.builder(sameTypeClusterState).nodes(discoveryNodes).metadata(metadata).build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameTypeClusterState); } + public void testDontAllowSwitchingToStrictCompatibilityModeWithoutRemoteIndexSettings() { + Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); + FeatureFlags.initializeFeatureFlags(nodeSettings); + Settings currentCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.MIXED) + .build(); + Settings intendedCompatibilityModeSettings = Settings.builder() + .put(REMOTE_STORE_COMPATIBILITY_MODE_SETTING.getKey(), RemoteStoreNodeService.CompatibilityMode.STRICT) + .build(); + ClusterUpdateSettingsRequest request = new ClusterUpdateSettingsRequest(); + request.persistentSettings(intendedCompatibilityModeSettings); + DiscoveryNode remoteNode1 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNode remoteNode2 = new DiscoveryNode( + UUIDs.base64UUID(), + buildNewFakeTransportAddress(), + getRemoteStoreNodeAttributes(), + DiscoveryNodeRole.BUILT_IN_ROLES, + Version.CURRENT + ); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .add(remoteNode2) + .localNodeId(remoteNode2.getId()) + .build(); + AllocationService allocationService = new AllocationService( + new AllocationDeciders(Collections.singleton(new MaxRetryAllocationDecider())), + new TestGatewayAllocator(), + new BalancedShardsAllocator(Settings.EMPTY), + EmptyClusterInfoService.INSTANCE, + EmptySnapshotsInfoService.INSTANCE + ); + TransportClusterUpdateSettingsAction transportClusterUpdateSettingsAction = new TransportClusterUpdateSettingsAction( + transportService, + clusterService, + threadPool, + allocationService, + new ActionFilters(Collections.emptySet()), + new IndexNameExpressionResolver(new ThreadContext(Settings.EMPTY)), + clusterService.getClusterSettings() + ); + + Metadata nonRemoteIndexMd = Metadata.builder(createIndexMetadataWithDocrepSettings("test")) + .persistentSettings(currentCompatibilityModeSettings) + .build(); + final ClusterState clusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(nonRemoteIndexMd) + .nodes(discoveryNodes) + .build(); + final SettingsException exception = expectThrows( + SettingsException.class, + () -> transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, clusterState) + ); + assertEquals( + "can not switch to STRICT compatibility mode since all indices in the cluster does not have remote store based index settings", + exception.getMessage() + ); + + Metadata remoteIndexMd = Metadata.builder(createIndexMetadataWithRemoteStoreSettings("test")) + .persistentSettings(currentCompatibilityModeSettings) + .build(); + ClusterState clusterStateWithRemoteIndices = ClusterState.builder(ClusterName.DEFAULT) + .metadata(remoteIndexMd) + .nodes(discoveryNodes) + .build(); + transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, clusterStateWithRemoteIndices); + } + public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersions() { Settings nodeSettings = Settings.builder().put(REMOTE_STORE_MIGRATION_EXPERIMENTAL, "true").build(); FeatureFlags.initializeFeatureFlags(nodeSettings); @@ -897,7 +975,10 @@ public void testDontAllowSwitchingCompatibilityModeForClusterWithMultipleVersion .localNodeId(discoveryNode2.getId()) .build(); - ClusterState sameVersionClusterState = ClusterState.builder(differentVersionClusterState).nodes(discoveryNodes).build(); + ClusterState sameVersionClusterState = ClusterState.builder(differentVersionClusterState) + .nodes(discoveryNodes) + .metadata(createIndexMetadataWithRemoteStoreSettings("test")) + .build(); transportClusterUpdateSettingsAction.validateCompatibilityModeSettingRequest(request, sameVersionClusterState); } @@ -907,4 +988,5 @@ private Map getRemoteStoreNodeAttributes() { remoteStoreNodeAttributes.put(REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, "my-translog-repo-1"); return remoteStoreNodeAttributes; } + } diff --git a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java index ebb7529d3f733..e881016fb9305 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/IndexShardRoutingTableTests.java @@ -39,6 +39,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.UUID; public class IndexShardRoutingTableTests extends OpenSearchTestCase { public void testEqualsAttributesKey() { @@ -69,4 +70,48 @@ public void testEquals() { assertNotEquals(table1, s); assertNotEquals(table1, table3); } + + public void testShardsMatchingPredicate() { + ShardId shardId = new ShardId(new Index("a", UUID.randomUUID().toString()), 0); + ShardRouting primary = TestShardRouting.newShardRouting(shardId, "node-1", true, ShardRoutingState.STARTED); + ShardRouting replica = TestShardRouting.newShardRouting(shardId, "node-2", false, ShardRoutingState.STARTED); + ShardRouting unassignedReplica = ShardRouting.newUnassigned( + shardId, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) + ); + ShardRouting relocatingReplica1 = TestShardRouting.newShardRouting( + shardId, + "node-3", + "node-4", + false, + ShardRoutingState.RELOCATING + ); + ShardRouting relocatingReplica2 = TestShardRouting.newShardRouting( + shardId, + "node-4", + "node-5", + false, + ShardRoutingState.RELOCATING + ); + + IndexShardRoutingTable table = new IndexShardRoutingTable( + shardId, + Arrays.asList(primary, replica, unassignedReplica, relocatingReplica1, relocatingReplica2) + ); + assertEquals(List.of(primary), table.shardsMatchingPredicate(ShardRouting::primary)); + assertEquals( + List.of(replica, unassignedReplica, relocatingReplica1, relocatingReplica2), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary()) + ); + assertEquals( + List.of(unassignedReplica), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary() && shardRouting.unassigned()) + ); + assertEquals( + Arrays.asList(relocatingReplica1, relocatingReplica2), + table.shardsMatchingPredicate(shardRouting -> !shardRouting.primary() && shardRouting.relocating()) + ); + } } diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java index 04e37e7d958d0..5e3b74ee138ab 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/FailedShardsRoutingTests.java @@ -68,7 +68,8 @@ import static org.opensearch.cluster.routing.ShardRoutingState.STARTED; import static org.opensearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; -import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.Matchers.anyOf; @@ -852,8 +853,10 @@ public void testPreferReplicaOnRemoteNodeForPrimaryPromotion() { // add a remote node and start primary shard Map remoteStoreNodeAttributes = Map.of( - REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, - "REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_VALUE" + REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_VALUE", + REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, + "REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_VALUE" ); DiscoveryNode remoteNode1 = new DiscoveryNode( UUIDs.base64UUID(), diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java new file mode 100644 index 0000000000000..d8220c93e4eeb --- /dev/null +++ b/server/src/test/java/org/opensearch/index/remote/RemoteMigrationIndexMetadataUpdaterTests.java @@ -0,0 +1,339 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.remote; + +import org.opensearch.Version; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; +import org.opensearch.cluster.routing.RecoverySource; +import org.opensearch.cluster.routing.RoutingTable; +import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.TestShardRouting; +import org.opensearch.cluster.routing.UnassignedInfo; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.index.shard.IndexShardTestUtils; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.Map; +import java.util.UUID; + +import static org.opensearch.cluster.metadata.IndexMetadata.REMOTE_STORE_CUSTOM_KEY; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING; +import static org.opensearch.indices.RemoteStoreSettings.CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING; +import static org.mockito.Mockito.mock; + +public class RemoteMigrationIndexMetadataUpdaterTests extends OpenSearchTestCase { + private final String indexName = "test-index"; + + public void testMaybeAddRemoteIndexSettingsAllPrimariesAndReplicasOnRemote() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeRemoteEnabledNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertTrue(currentSettingsVersion < indexMetadataBuilder.settingsVersion()); + assertRemoteSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotRunWhenSettingsAlreadyPresent() throws IOException { + Metadata metadata = createIndexMetadataWithRemoteStoreSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeRemoteEnabledNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotUpdateSettingsWhenAllShardsInDocrep() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeDiscoNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableAllShardsStarted(indexName, 1, 1, primaryNode, replicaNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + assertDocrepSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsUpdatesIndexSettingsWithUnassignedReplicas() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableReplicasUnassigned(indexName, 1, 1, primaryNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertTrue(currentSettingsVersion < indexMetadataBuilder.settingsVersion()); + assertRemoteSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeAddRemoteIndexSettingsDoesNotUpdateIndexSettingsWithRelocatingReplicas() throws IOException { + Metadata metadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = metadata.index(indexName); + IndexMetadata.Builder indexMetadataBuilder = IndexMetadata.builder(existingIndexMetadata); + long currentSettingsVersion = indexMetadataBuilder.settingsVersion(); + DiscoveryNode primaryNode = IndexShardTestUtils.getFakeRemoteEnabledNode("1"); + DiscoveryNode replicaNode = IndexShardTestUtils.getFakeDiscoNode("2"); + DiscoveryNode replicaRelocatingNode = IndexShardTestUtils.getFakeDiscoNode("3"); + DiscoveryNodes allNodes = DiscoveryNodes.builder().add(primaryNode).add(replicaNode).build(); + RoutingTable routingTable = createRoutingTableReplicasRelocating(indexName, 1, 1, primaryNode, replicaNode, replicaRelocatingNode); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + allNodes, + routingTable, + existingIndexMetadata, + metadata.settings(), + logger + ); + migrationIndexMetadataUpdater.maybeAddRemoteIndexSettings(indexMetadataBuilder, indexName); + assertEquals(currentSettingsVersion, indexMetadataBuilder.settingsVersion()); + assertDocrepSettingsApplied(indexMetadataBuilder.build()); + } + + public void testMaybeUpdateRemoteStorePathStrategyExecutes() { + Metadata currentMetadata = createIndexMetadataWithDocrepSettings(indexName); + IndexMetadata existingIndexMetadata = currentMetadata.index(indexName); + IndexMetadata.Builder builder = IndexMetadata.builder(existingIndexMetadata); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + mock(RoutingTable.class), + existingIndexMetadata, + Settings.builder() + .put( + CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.getKey(), + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.name() + ) + .put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX.name()) + .build(), + logger + ); + migrationIndexMetadataUpdater.maybeUpdateRemoteStorePathStrategy(builder, indexName); + assertCustomPathMetadataIsPresent(builder.build()); + } + + public void testMaybeUpdateRemoteStorePathStrategyDoesNotExecute() { + Metadata currentMetadata = createIndexMetadataWithRemoteStoreSettings(indexName); + IndexMetadata existingIndexMetadata = currentMetadata.index(indexName); + IndexMetadata.Builder builder = IndexMetadata.builder(currentMetadata.index(indexName)); + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + RemoteMigrationIndexMetadataUpdater migrationIndexMetadataUpdater = new RemoteMigrationIndexMetadataUpdater( + discoveryNodes, + mock(RoutingTable.class), + existingIndexMetadata, + Settings.builder() + .put( + CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING.getKey(), + RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1.name() + ) + .put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), RemoteStoreEnums.PathType.HASHED_PREFIX.name()) + .build(), + logger + ); + + migrationIndexMetadataUpdater.maybeUpdateRemoteStorePathStrategy(builder, indexName); + + assertCustomPathMetadataIsPresent(builder.build()); + } + + private RoutingTable createRoutingTableAllShardsStarted( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNode, + DiscoveryNode replicaHostingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNode.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, replicaHostingNode.getId(), false, ShardRoutingState.STARTED) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + private RoutingTable createRoutingTableReplicasUnassigned( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNode.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + ShardRouting.newUnassigned( + shardId, + false, + RecoverySource.PeerRecoverySource.INSTANCE, + new UnassignedInfo(UnassignedInfo.Reason.INDEX_CREATED, null) + ) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + private RoutingTable createRoutingTableReplicasRelocating( + String indexName, + int numberOfShards, + int numberOfReplicas, + DiscoveryNode primaryHostingNodes, + DiscoveryNode replicaHostingNode, + DiscoveryNode replicaRelocatingNode + ) { + RoutingTable.Builder builder = RoutingTable.builder(); + Index index = new Index(indexName, UUID.randomUUID().toString()); + + IndexRoutingTable.Builder indexRoutingTableBuilder = IndexRoutingTable.builder(index); + for (int i = 0; i < numberOfShards; i++) { + ShardId shardId = new ShardId(index, i); + IndexShardRoutingTable.Builder indexShardRoutingTable = new IndexShardRoutingTable.Builder(shardId); + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting(shardId, primaryHostingNodes.getId(), true, ShardRoutingState.STARTED) + ); + for (int j = 0; j < numberOfReplicas; j++) { + indexShardRoutingTable.addShard( + TestShardRouting.newShardRouting( + shardId, + replicaHostingNode.getId(), + replicaRelocatingNode.getId(), + false, + ShardRoutingState.RELOCATING + ) + ); + } + indexRoutingTableBuilder.addIndexShard(indexShardRoutingTable.build()); + } + return builder.add(indexRoutingTableBuilder.build()).build(); + } + + public static Metadata createIndexMetadataWithRemoteStoreSettings(String indexName) { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(indexName); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.getKey(), true) + .put(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.getKey(), "dummy-tlog-repo") + .put(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.getKey(), "dummy-segment-repo") + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), "SEGMENT") + .build() + ) + .putCustom( + REMOTE_STORE_CUSTOM_KEY, + Map.of(RemoteStoreEnums.PathType.NAME, "dummy", RemoteStoreEnums.PathHashAlgorithm.NAME, "dummy") + ) + .build(); + return Metadata.builder().put(indexMetadata).build(); + } + + public static Metadata createIndexMetadataWithDocrepSettings(String indexName) { + IndexMetadata.Builder indexMetadata = IndexMetadata.builder(indexName); + indexMetadata.settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.getKey(), "DOCUMENT") + .build() + ).build(); + return Metadata.builder().put(indexMetadata).build(); + } + + private void assertRemoteSettingsApplied(IndexMetadata indexMetadata) { + assertTrue(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings())); + assertTrue(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertTrue(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertEquals(ReplicationType.SEGMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexMetadata.getSettings())); + } + + private void assertDocrepSettingsApplied(IndexMetadata indexMetadata) { + assertFalse(IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings())); + assertFalse(IndexMetadata.INDEX_REMOTE_TRANSLOG_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertFalse(IndexMetadata.INDEX_REMOTE_SEGMENT_STORE_REPOSITORY_SETTING.exists(indexMetadata.getSettings())); + assertEquals(ReplicationType.DOCUMENT, IndexMetadata.INDEX_REPLICATION_TYPE_SETTING.get(indexMetadata.getSettings())); + } + + private void assertCustomPathMetadataIsPresent(IndexMetadata indexMetadata) { + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY)); + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY).get(RemoteStoreEnums.PathType.NAME)); + assertNotNull(indexMetadata.getCustomData(REMOTE_STORE_CUSTOM_KEY).get(RemoteStoreEnums.PathHashAlgorithm.NAME)); + } +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java index 4d3e633848975..c1fc0cdaa0d3b 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreUtilsTests.java @@ -8,10 +8,13 @@ package org.opensearch.index.remote; +import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.blobstore.BlobMetadata; import org.opensearch.common.blobstore.support.PlainBlobMetadata; +import org.opensearch.index.shard.IndexShardTestUtils; import org.opensearch.index.store.RemoteSegmentStoreDirectory; import org.opensearch.index.translog.transfer.TranslogTransferMetadata; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.OpenSearchTestCase; import java.math.BigInteger; @@ -28,6 +31,8 @@ import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64; import static org.opensearch.index.remote.RemoteStoreUtils.urlBase64ToLong; import static org.opensearch.index.remote.RemoteStoreUtils.verifyNoMultipleWriters; +import static org.opensearch.index.shard.IndexShardTestUtils.MOCK_SEGMENT_REPO_NAME; +import static org.opensearch.index.shard.IndexShardTestUtils.MOCK_TLOG_REPO_NAME; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.METADATA_PREFIX; import static org.opensearch.index.store.RemoteSegmentStoreDirectory.MetadataFilenameUtils.SEPARATOR; import static org.opensearch.index.translog.transfer.TranslogTransferMetadata.METADATA_SEPARATOR; @@ -316,6 +321,19 @@ public void testLongToCompositeUrlBase64AndBinaryEncoding() { } } + public void testGetRemoteStoreRepoNameWithRemoteNodes() { + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeRemoteEnabledNode("1")).build(); + Map expected = new HashMap<>(); + expected.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); + expected.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); + assertEquals(expected, RemoteStoreUtils.getRemoteStoreRepoName(discoveryNodes)); + } + + public void testGetRemoteStoreRepoNameWithDocrepNdoes() { + DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(IndexShardTestUtils.getFakeDiscoNode("1")).build(); + assertTrue(RemoteStoreUtils.getRemoteStoreRepoName(discoveryNodes).isEmpty()); + } + static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) { char ch = encodedValue.charAt(0); int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java index d3a4a95c3bdef..abf8f2a4da6c1 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestUtils.java @@ -21,6 +21,9 @@ import java.util.Map; public class IndexShardTestUtils { + public static final String MOCK_SEGMENT_REPO_NAME = "segment-test-repo"; + public static final String MOCK_TLOG_REPO_NAME = "tlog-test-repo"; + public static DiscoveryNode getFakeDiscoNode(String id) { return new DiscoveryNode( id, @@ -34,7 +37,8 @@ public static DiscoveryNode getFakeDiscoNode(String id) { public static DiscoveryNode getFakeRemoteEnabledNode(String id) { Map remoteNodeAttributes = new HashMap(); - remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_SEGMENT_REPO_NAME); + remoteNodeAttributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_TRANSLOG_REPOSITORY_NAME_ATTRIBUTE_KEY, MOCK_TLOG_REPO_NAME); return new DiscoveryNode( id, id, diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 7f6313d2d7214..a9f6fdc86155d 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -2409,6 +2409,12 @@ protected String primaryNodeName(String indexName) { return clusterState.getRoutingNodes().node(nodeId).node().getName(); } + protected String primaryNodeName(String indexName, int shardId) { + ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); + String nodeId = clusterState.getRoutingTable().index(indexName).shard(shardId).primaryShard().currentNodeId(); + return clusterState.getRoutingNodes().node(nodeId).node().getName(); + } + protected String replicaNodeName(String indexName) { ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); String nodeId = clusterState.getRoutingTable().index(indexName).shard(0).replicaShards().get(0).currentNodeId(); From 5d61ac25517e2afc1980b15f8b8999f35daf1e48 Mon Sep 17 00:00:00 2001 From: peteralfonsi Date: Tue, 30 Apr 2024 10:59:03 -0700 Subject: [PATCH 3/7] Fix flaky test CacheStatsAPIIndicesRequestCacheIT.testNullLevels() (#13457) * Fix flaky test Signed-off-by: Peter Alfonsi * Initialize CommonStatsFlags with empty array for levels Signed-off-by: Peter Alfonsi * Fixes tests using incorrect null levels Signed-off-by: Peter Alfonsi * rerun Signed-off-by: Peter Alfonsi --------- Signed-off-by: Peter Alfonsi Co-authored-by: Peter Alfonsi --- .../cache/store/disk/EhCacheDiskCacheTests.java | 5 +++-- .../action/admin/indices/stats/CommonStatsFlags.java | 6 +++--- .../java/org/opensearch/common/cache/ICache.java | 4 ++-- .../common/cache/stats/DefaultCacheStatsHolder.java | 4 +++- .../org/opensearch/indices/IndicesRequestCache.java | 4 ++-- .../cache/stats/ImmutableCacheStatsHolderTests.java | 12 +++++++----- .../cache/store/OpenSearchOnHeapCacheTests.java | 6 +++--- .../opensearch/indices/IndicesRequestCacheTests.java | 9 ++++++--- 8 files changed, 29 insertions(+), 21 deletions(-) diff --git a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java index f93b09cf2d4f4..f2bfe1209a4c7 100644 --- a/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java +++ b/plugins/cache-ehcache/src/test/java/org/opensearch/cache/store/disk/EhCacheDiskCacheTests.java @@ -829,7 +829,8 @@ public void testInvalidateWithDropDimensions() throws Exception { ICacheKey keyToDrop = keysAdded.get(0); - ImmutableCacheStats snapshot = ehCacheDiskCachingTier.stats().getStatsForDimensionValues(keyToDrop.dimensions); + String[] levels = dimensionNames.toArray(new String[0]); + ImmutableCacheStats snapshot = ehCacheDiskCachingTier.stats(levels).getStatsForDimensionValues(keyToDrop.dimensions); assertNotNull(snapshot); keyToDrop.setDropStatsForDimensions(true); @@ -837,7 +838,7 @@ public void testInvalidateWithDropDimensions() throws Exception { // Now assert the stats are gone for any key that has this combination of dimensions, but still there otherwise for (ICacheKey keyAdded : keysAdded) { - snapshot = ehCacheDiskCachingTier.stats().getStatsForDimensionValues(keyAdded.dimensions); + snapshot = ehCacheDiskCachingTier.stats(levels).getStatsForDimensionValues(keyAdded.dimensions); if (keyAdded.dimensions.equals(keyToDrop.dimensions)) { assertNull(snapshot); } else { diff --git a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java index 3cb178b63167d..4d108f8d78a69 100644 --- a/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java +++ b/server/src/main/java/org/opensearch/action/admin/indices/stats/CommonStatsFlags.java @@ -66,7 +66,7 @@ public class CommonStatsFlags implements Writeable, Cloneable { private boolean includeOnlyTopIndexingPressureMetrics = false; // Used for metric CACHE_STATS, to determine which caches to report stats for private EnumSet includeCaches = EnumSet.noneOf(CacheType.class); - private String[] levels; + private String[] levels = new String[0]; /** * @param flags flags to set. If no flags are supplied, default flags will be set. @@ -139,7 +139,7 @@ public CommonStatsFlags all() { includeAllShardIndexingPressureTrackers = false; includeOnlyTopIndexingPressureMetrics = false; includeCaches = EnumSet.noneOf(CacheType.class); - levels = null; + levels = new String[0]; return this; } @@ -156,7 +156,7 @@ public CommonStatsFlags clear() { includeAllShardIndexingPressureTrackers = false; includeOnlyTopIndexingPressureMetrics = false; includeCaches = EnumSet.noneOf(CacheType.class); - levels = null; + levels = new String[0]; return this; } diff --git a/server/src/main/java/org/opensearch/common/cache/ICache.java b/server/src/main/java/org/opensearch/common/cache/ICache.java index bc69ccee0c2fb..f5dd644db6d6b 100644 --- a/server/src/main/java/org/opensearch/common/cache/ICache.java +++ b/server/src/main/java/org/opensearch/common/cache/ICache.java @@ -45,12 +45,12 @@ public interface ICache extends Closeable { void refresh(); - // Return all stats without aggregation. + // Return total stats only default ImmutableCacheStatsHolder stats() { return stats(null); } - // Return stats aggregated by the provided levels. If levels is null, do not aggregate and return all stats. + // Return stats aggregated by the provided levels. If levels is null or an empty array, return total stats only. ImmutableCacheStatsHolder stats(String[] levels); /** diff --git a/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java b/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java index 0162a10487eba..5574e345b6d3d 100644 --- a/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java +++ b/server/src/main/java/org/opensearch/common/cache/stats/DefaultCacheStatsHolder.java @@ -12,6 +12,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -170,7 +171,8 @@ private boolean internalIncrementHelper( */ @Override public ImmutableCacheStatsHolder getImmutableCacheStatsHolder(String[] levels) { - return new ImmutableCacheStatsHolder(this.statsRoot, levels, dimensionNames, storeName); + String[] nonNullLevels = Objects.requireNonNullElseGet(levels, () -> new String[0]); + return new ImmutableCacheStatsHolder(this.statsRoot, nonNullLevels, dimensionNames, storeName); } @Override diff --git a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java index f5e7ba26539a6..35826d45f969f 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java +++ b/server/src/main/java/org/opensearch/indices/IndicesRequestCache.java @@ -826,8 +826,8 @@ long count() { /** * Returns the current cache stats. Pkg-private for testing. */ - ImmutableCacheStatsHolder stats() { - return cache.stats(); + ImmutableCacheStatsHolder stats(String[] levels) { + return cache.stats(levels); } int numRegisteredCloseListeners() { // for testing diff --git a/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java b/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java index 46483e92b76bf..285840a3451c6 100644 --- a/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java +++ b/server/src/test/java/org/opensearch/common/cache/stats/ImmutableCacheStatsHolderTests.java @@ -29,10 +29,11 @@ public class ImmutableCacheStatsHolderTests extends OpenSearchTestCase { public void testSerialization() throws Exception { List dimensionNames = List.of("dim1", "dim2", "dim3"); + String[] levels = dimensionNames.toArray(new String[0]); DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); DefaultCacheStatsHolderTests.populateStats(statsHolder, usedDimensionValues, 100, 10); - ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(null); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels); assertNotEquals(0, stats.getStatsRoot().children.size()); BytesStreamOutput os = new BytesStreamOutput(); @@ -57,19 +58,20 @@ public void testSerialization() throws Exception { public void testEquals() throws Exception { List dimensionNames = List.of("dim1", "dim2", "dim3"); + String[] levels = dimensionNames.toArray(new String[0]); DefaultCacheStatsHolder statsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); DefaultCacheStatsHolder differentStoreNameStatsHolder = new DefaultCacheStatsHolder(dimensionNames, "nonMatchingStoreName"); DefaultCacheStatsHolder nonMatchingStatsHolder = new DefaultCacheStatsHolder(dimensionNames, storeName); Map> usedDimensionValues = DefaultCacheStatsHolderTests.getUsedDimensionValues(statsHolder, 10); DefaultCacheStatsHolderTests.populateStats(List.of(statsHolder, differentStoreNameStatsHolder), usedDimensionValues, 100, 10); DefaultCacheStatsHolderTests.populateStats(nonMatchingStatsHolder, usedDimensionValues, 100, 10); - ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(null); + ImmutableCacheStatsHolder stats = statsHolder.getImmutableCacheStatsHolder(levels); - ImmutableCacheStatsHolder secondStats = statsHolder.getImmutableCacheStatsHolder(null); + ImmutableCacheStatsHolder secondStats = statsHolder.getImmutableCacheStatsHolder(levels); assertEquals(stats, secondStats); - ImmutableCacheStatsHolder nonMatchingStats = nonMatchingStatsHolder.getImmutableCacheStatsHolder(null); + ImmutableCacheStatsHolder nonMatchingStats = nonMatchingStatsHolder.getImmutableCacheStatsHolder(levels); assertNotEquals(stats, nonMatchingStats); - ImmutableCacheStatsHolder differentStoreNameStats = differentStoreNameStatsHolder.getImmutableCacheStatsHolder(null); + ImmutableCacheStatsHolder differentStoreNameStats = differentStoreNameStatsHolder.getImmutableCacheStatsHolder(levels); assertNotEquals(stats, differentStoreNameStats); } diff --git a/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java b/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java index 3208fde306e5a..f5885d03f1850 100644 --- a/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java +++ b/server/src/test/java/org/opensearch/common/cache/store/OpenSearchOnHeapCacheTests.java @@ -145,8 +145,8 @@ public void testInvalidateWithDropDimensions() throws Exception { } ICacheKey keyToDrop = keysAdded.get(0); - - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(keyToDrop.dimensions); + String[] levels = dimensionNames.toArray(new String[0]); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(keyToDrop.dimensions); assertNotNull(snapshot); keyToDrop.setDropStatsForDimensions(true); @@ -154,7 +154,7 @@ public void testInvalidateWithDropDimensions() throws Exception { // Now assert the stats are gone for any key that has this combination of dimensions, but still there otherwise for (ICacheKey keyAdded : keysAdded) { - snapshot = cache.stats().getStatsForDimensionValues(keyAdded.dimensions); + snapshot = cache.stats(levels).getStatsForDimensionValues(keyAdded.dimensions); if (keyAdded.dimensions.equals(keyToDrop.dimensions)) { assertNull(snapshot); } else { diff --git a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java index 9e2c33998abd6..bbf2867a0087c 100644 --- a/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java +++ b/server/src/test/java/org/opensearch/indices/IndicesRequestCacheTests.java @@ -89,7 +89,9 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; +import static org.opensearch.indices.IndicesRequestCache.INDEX_DIMENSION_NAME; import static org.opensearch.indices.IndicesRequestCache.INDICES_REQUEST_CACHE_STALENESS_THRESHOLD_SETTING; +import static org.opensearch.indices.IndicesRequestCache.SHARD_ID_DIMENSION_NAME; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -799,6 +801,7 @@ private String getReaderCacheKeyId(DirectoryReader reader) { public void testClosingIndexWipesStats() throws Exception { IndicesService indicesService = getInstanceFromNode(IndicesService.class); + String[] levels = { INDEX_DIMENSION_NAME, SHARD_ID_DIMENSION_NAME }; // Create two indices each with multiple shards int numShards = 3; Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numShards).build(); @@ -873,8 +876,8 @@ public void testClosingIndexWipesStats() throws Exception { ShardId shardId = indexService.getShard(i).shardId(); List dimensionValues = List.of(shardId.getIndexName(), shardId.toString()); initialDimensionValues.add(dimensionValues); - ImmutableCacheStatsHolder holder = cache.stats(); - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(dimensionValues); + ImmutableCacheStatsHolder holder = cache.stats(levels); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(dimensionValues); assertNotNull(snapshot); // check the values are not empty by confirming entries != 0, this should always be true since the missed value is loaded // into the cache @@ -895,7 +898,7 @@ public void testClosingIndexWipesStats() throws Exception { // Now stats for the closed index should be gone for (List dimensionValues : initialDimensionValues) { - ImmutableCacheStats snapshot = cache.stats().getStatsForDimensionValues(dimensionValues); + ImmutableCacheStats snapshot = cache.stats(levels).getStatsForDimensionValues(dimensionValues); if (dimensionValues.get(0).equals(indexToCloseName)) { assertNull(snapshot); } else { From 1765e20cf3939cb0fff0b8af37294da26d9cbea0 Mon Sep 17 00:00:00 2001 From: Sayali Gaikawad <61760125+gaiksaya@users.noreply.github.com> Date: Tue, 30 Apr 2024 18:28:01 -0700 Subject: [PATCH 4/7] Skip running gradle checks on release notes (#13477) Signed-off-by: Sayali Gaikawad --- .github/workflows/gradle-check.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/gradle-check.yml b/.github/workflows/gradle-check.yml index 1f5c187c28e7d..0921aff721836 100644 --- a/.github/workflows/gradle-check.yml +++ b/.github/workflows/gradle-check.yml @@ -7,6 +7,10 @@ on: - 'dependabot/**' pull_request_target: types: [opened, synchronize, reopened] + paths-ignore: + - 'release-notes/**' + - '.github/**' + - '**.md' permissions: contents: read # to fetch code (actions/checkout) From 5133e5fd881402fcd9a4d6dcc1845aea5f73e0ec Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Wed, 1 May 2024 15:33:18 -0400 Subject: [PATCH 5/7] =?UTF-8?q?Revert=20"Removing=20unused=20fetch=20sub?= =?UTF-8?q?=20phase=20processor=20initialization=20during=20fetch=E2=80=A6?= =?UTF-8?q?=20(#12503)"=20(#13486)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit da5b2053864f24dcb3b8c555a9fd19e0d91a14e6. Signed-off-by: Andriy Redko --- CHANGELOG.md | 1 + .../search.aggregation/400_inner_hits.yml | 65 +++++++++++++++++++ .../opensearch/search/fetch/FetchContext.java | 8 --- .../fetch/subphase/InnerHitsContext.java | 5 -- .../search/fetch/subphase/InnerHitsPhase.java | 2 +- .../fetch/subphase/ScriptFieldsPhase.java | 2 +- .../search/internal/SearchContext.java | 4 -- .../fetch/subphase/InnerHitsPhaseTests.java | 53 --------------- .../subphase/ScriptFieldsPhaseTests.java | 53 --------------- 9 files changed, 68 insertions(+), 125 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml delete mode 100644 server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java delete mode 100644 server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index ec85052873407..5bff49af99473 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -70,6 +70,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Refactoring globMatch using simpleMatchWithNormalizedStrings from Regex ([#13104](https://github.com/opensearch-project/OpenSearch/pull/13104)) - [BWC and API enforcement] Reconsider the breaking changes check policy to detect breaking changes against released versions ([#13292](https://github.com/opensearch-project/OpenSearch/pull/13292)) - Switch to macos-13 runner for precommit and assemble github actions due to macos-latest is now arm64 ([#13412](https://github.com/opensearch-project/OpenSearch/pull/13412)) +- [Revert] Prevent unnecessary fetch sub phase processor initialization during fetch phase execution ([#12503](https://github.com/opensearch-project/OpenSearch/pull/12503)) ### Deprecated diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml new file mode 100644 index 0000000000000..d4584a251816e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/400_inner_hits.yml @@ -0,0 +1,65 @@ +setup: + - do: + indices.create: + index: test_1 + body: + settings: + number_of_replicas: 0 + mappings: + properties: + list_id: + type: integer + names: + type: nested + properties: + full_name: + type: text + + - do: + bulk: + refresh: true + body: + - index: + _index: test_1 + _id: 1 + - list_id: 1 + names: + - full_name: John Doe + - full_name: John Micheal Doe + - index: + _index: test_1 + _id: 2 + - list_id: 2 + names: + - full_name: Jane Doe + - full_name: Jane Michelle Doe + +--- +"Include inner hits in top hits": + - do: + search: + rest_total_hits_as_int: true + body: + query: + nested: + path: names + query: + match: + names.full_name: Doe + inner_hits: { } + size: 0 + aggs: + lists: + terms: + field: list_id + aggs: + top_result: + top_hits: + size: 10 + + - length: { hits.hits: 0 } + - length: { aggregations.lists.buckets: 2 } + - length: { aggregations.lists.buckets.0.top_result.hits.hits: 1 } + - length: { aggregations.lists.buckets.0.top_result.hits.hits.0.inner_hits.names.hits.hits: 2 } + - length: { aggregations.lists.buckets.1.top_result.hits.hits: 1 } + - length: { aggregations.lists.buckets.1.top_result.hits.hits.0.inner_hits.names.hits.hits: 2 } diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchContext.java b/server/src/main/java/org/opensearch/search/fetch/FetchContext.java index 780a6f35524ea..5be3733106655 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchContext.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchContext.java @@ -192,10 +192,6 @@ public boolean includeNamedQueriesScore() { return searchContext.includeNamedQueriesScore(); } - public boolean hasInnerHits() { - return searchContext.hasInnerHits(); - } - /** * Configuration for returning inner hits */ @@ -217,10 +213,6 @@ public FetchFieldsContext fetchFieldsContext() { return searchContext.fetchFieldsContext(); } - public boolean hasScriptFields() { - return searchContext.hasScriptFields(); - } - /** * Configuration for script fields */ diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java index fa80bb04c77f5..5855a0b3217f3 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsContext.java @@ -119,11 +119,6 @@ public String getName() { return name; } - @Override - public boolean hasInnerHits() { - return childInnerHits != null; - } - @Override public InnerHitsContext innerHits() { return childInnerHits; diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java index cadad8529da9d..0b07dc35f13bb 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/InnerHitsPhase.java @@ -64,7 +64,7 @@ public InnerHitsPhase(FetchPhase fetchPhase) { @Override public FetchSubPhaseProcessor getProcessor(FetchContext searchContext) { - if (searchContext.hasInnerHits() == false) { + if (searchContext.innerHits() == null) { return null; } Map innerHits = searchContext.innerHits().getInnerHits(); diff --git a/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java b/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java index bee536dbaf7f6..67d1863050a7b 100644 --- a/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java +++ b/server/src/main/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhase.java @@ -54,7 +54,7 @@ public final class ScriptFieldsPhase implements FetchSubPhase { @Override public FetchSubPhaseProcessor getProcessor(FetchContext context) { - if (context.hasScriptFields() == false) { + if (context.scriptFields() == null) { return null; } List scriptFields = context.scriptFields().fields(); diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 07f3616bbc138..0c8240d3a8322 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -194,10 +194,6 @@ public final void close() { public abstract void highlight(SearchHighlightContext highlight); - public boolean hasInnerHits() { - return innerHitsContext != null; - } - public InnerHitsContext innerHits() { if (innerHitsContext == null) { innerHitsContext = new InnerHitsContext(); diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java deleted file mode 100644 index 7ca5977a1c276..0000000000000 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/InnerHitsPhaseTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.fetch.subphase; - -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.search.fetch.FetchContext; -import org.opensearch.search.internal.SearchContext; -import org.opensearch.search.lookup.SearchLookup; -import org.opensearch.test.OpenSearchTestCase; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class InnerHitsPhaseTests extends OpenSearchTestCase { - - /* - Returns mock search context reused across test methods - */ - private SearchContext getMockSearchContext(final boolean hasInnerHits) { - final QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(queryShardContext.newFetchLookup()).thenReturn(mock(SearchLookup.class)); - - final SearchContext searchContext = mock(SearchContext.class); - when(searchContext.hasInnerHits()).thenReturn(hasInnerHits); - when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); - - return searchContext; - } - - /* - Validates that InnerHitsPhase processor is not initialized when no inner hits - */ - public void testInnerHitsNull() { - assertNull(new InnerHitsPhase(null).getProcessor(new FetchContext(getMockSearchContext(false)))); - } - - /* - Validates that InnerHitsPhase processor is initialized when inner hits are present - */ - public void testInnerHitsNonNull() { - final SearchContext searchContext = getMockSearchContext(true); - when(searchContext.innerHits()).thenReturn(new InnerHitsContext()); - - assertNotNull(new InnerHitsPhase(null).getProcessor(new FetchContext(searchContext))); - } - -} diff --git a/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java b/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java deleted file mode 100644 index eb6338997ab9f..0000000000000 --- a/server/src/test/java/org/opensearch/search/fetch/subphase/ScriptFieldsPhaseTests.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.search.fetch.subphase; - -import org.opensearch.index.query.QueryShardContext; -import org.opensearch.search.fetch.FetchContext; -import org.opensearch.search.internal.SearchContext; -import org.opensearch.search.lookup.SearchLookup; -import org.opensearch.test.OpenSearchTestCase; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class ScriptFieldsPhaseTests extends OpenSearchTestCase { - - /* - Returns mock search context reused across test methods - */ - private SearchContext getMockSearchContext(final boolean hasScriptFields) { - final QueryShardContext queryShardContext = mock(QueryShardContext.class); - when(queryShardContext.newFetchLookup()).thenReturn(mock(SearchLookup.class)); - - final SearchContext searchContext = mock(SearchContext.class); - when(searchContext.hasScriptFields()).thenReturn(hasScriptFields); - when(searchContext.getQueryShardContext()).thenReturn(queryShardContext); - - return searchContext; - } - - /* - Validates that ScriptFieldsPhase processor is not initialized when no script fields - */ - public void testScriptFieldsNull() { - assertNull(new ScriptFieldsPhase().getProcessor(new FetchContext(getMockSearchContext(false)))); - } - - /* - Validates that ScriptFieldsPhase processor is initialized when script fields are present - */ - public void testScriptFieldsNonNull() { - final SearchContext searchContext = getMockSearchContext(true); - when(searchContext.scriptFields()).thenReturn(new ScriptFieldsContext()); - - assertNotNull(new ScriptFieldsPhase().getProcessor(new FetchContext(searchContext))); - } - -} From 00947fce98b33e0b2b50cd887c9ac4e4d0ffba90 Mon Sep 17 00:00:00 2001 From: Peter Nied Date: Wed, 1 May 2024 22:04:19 -0500 Subject: [PATCH 6/7] Revert "Skip running gradle checks on release notes (#13477)" (#13499) This reverts commit 1765e20cf3939cb0fff0b8af37294da26d9cbea0. --- .github/workflows/gradle-check.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/gradle-check.yml b/.github/workflows/gradle-check.yml index 0921aff721836..1f5c187c28e7d 100644 --- a/.github/workflows/gradle-check.yml +++ b/.github/workflows/gradle-check.yml @@ -7,10 +7,6 @@ on: - 'dependabot/**' pull_request_target: types: [opened, synchronize, reopened] - paths-ignore: - - 'release-notes/**' - - '.github/**' - - '**.md' permissions: contents: read # to fetch code (actions/checkout) From ef841dd16878fb60b082dccd8faa8d9099e61296 Mon Sep 17 00:00:00 2001 From: Lakshya Taragi <157457166+ltaragi@users.noreply.github.com> Date: Thu, 2 May 2024 12:51:45 +0530 Subject: [PATCH 7/7] [Remote Migration] Handle shard allocation for none migration direction (#13322) Signed-off-by: Lakshya Taragi --- .../RemoteMigrationAllocationDeciderIT.java | 409 +++++++++++++++++- .../RemotePrimaryRelocationIT.java | 8 + ...eMigrationShardAllocationBaseTestCase.java | 165 ++++++- .../metadata/MetadataCreateIndexService.java | 2 +- ...RemoteStoreMigrationAllocationDecider.java | 48 +- ...eStoreMigrationAllocationDeciderTests.java | 337 +++++++-------- 6 files changed, 753 insertions(+), 216 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java index de425ffc63816..eeb6a5a5626e4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteMigrationAllocationDeciderIT.java @@ -11,8 +11,11 @@ import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.UnassignedInfo; import org.opensearch.cluster.routing.allocation.command.MoveAllocationCommand; +import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.common.Priority; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; @@ -21,13 +24,17 @@ import java.io.IOException; import java.util.List; +import java.util.Locale; +import java.util.Optional; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.CompatibilityMode.MIXED; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) -public class RemoteMigrationAllocationDeciderIT extends MigrationBaseTestCase { +public class RemoteMigrationAllocationDeciderIT extends RemoteStoreMigrationShardAllocationBaseTestCase { // When the primary is on doc rep node, existing replica copy can get allocated on excluded docrep node. public void testFilterAllocationSkipsReplica() throws IOException { @@ -127,4 +134,404 @@ public void testFilterAllocationSkipsReplicaOnExcludedNode() throws IOException assertTrue(clusterHealthResponse.isTimedOut()); ensureYellow("test"); } + + // When under mixed mode and remote_store direction, a primary shard can only be allocated to a remote node + + public void testNewPrimaryShardAllocationForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data node"); + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode = assertNodeInCluster(nonRemoteNodeName); + + logger.info("Set mixed mode and remote_store direction"); + setClusterMode(MIXED.mode); + setDirection(REMOTE_STORE.direction); + + logger.info("Verify expected decision for allocating a new primary shard on a non-remote node"); + prepareIndexWithoutReplica(Optional.empty()); + Decision decision = getDecisionForTargetNode(nonRemoteNode, true, true, false); + assertEquals(Decision.Type.NO, decision.type()); + assertEquals( + "[remote_store migration_direction]: primary shard copy can not be allocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt allocation on non-remote node"); + attemptAllocation(null); + + logger.info("Verify non-allocation of primary shard on non-remote node"); + assertNonAllocation(true); + + logger.info("Add remote data node"); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + + logger.info("Verify expected decision for allocating a new primary shard on a remote node"); + excludeAllNodes(); + decision = getDecisionForTargetNode(remoteNode, true, true, false); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: primary shard copy can be allocated to a remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of primary shard on remote node"); + assertAllocation(true, remoteNode); + } + + // When under mixed mode and remote_store direction, a replica shard can only be allocated to a remote node if the primary has relocated + // to another remote node + + public void testNewReplicaShardAllocationIfPrimaryShardOnNonRemoteNodeForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data node"); + String nonRemoteNodeName1 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode1 = assertNodeInCluster(nonRemoteNodeName1); + + logger.info("Allocate primary shard on non-remote node"); + prepareIndexWithAllocatedPrimary(nonRemoteNode1, Optional.empty()); + + logger.info("Add remote data node"); + setClusterMode(MIXED.mode); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + logger.info("Verify expected decision for allocating a replica shard on a remote node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(remoteNode, false, true, false); + assertEquals(Decision.Type.NO, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can not be allocated to a remote node since primary shard copy is not yet migrated to remote", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation of replica shard"); + attemptAllocation(null); + + logger.info("Verify non-allocation of replica shard"); + assertNonAllocation(false); + + logger.info("Add another non-remote data node"); + setAddRemote(false); + String nonRemoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode2 = assertNodeInCluster(nonRemoteNodeName2); + + logger.info("Verify expected decision for allocating the replica shard on a non-remote node"); + excludeAllNodes(); + decision = getDecisionForTargetNode(nonRemoteNode2, false, true, false); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be allocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt free allocation of replica shard"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of replica shard on non-remote node"); + assertAllocation(false, nonRemoteNode2); + } + + public void testNewReplicaShardAllocationIfPrimaryShardOnRemoteNodeForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add non-remote data nodes"); + String nonRemoteNodeName1 = internalCluster().startDataOnlyNode(); + String nonRemoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode nonRemoteNode1 = assertNodeInCluster(nonRemoteNodeName1); + DiscoveryNode nonRemoteNode2 = assertNodeInCluster(nonRemoteNodeName2); + + logger.info("Allocate primary and replica shard on non-remote nodes"); + createIndex(TEST_INDEX, 1); + ensureGreen(TEST_INDEX); + + logger.info("Set mixed mode"); + setClusterMode(MIXED.mode); + + logger.info("Add remote data nodes"); + setAddRemote(true); + String remoteNodeName1 = internalCluster().startDataOnlyNode(); + String remoteNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode1 = assertNodeInCluster(remoteNodeName1); + DiscoveryNode remoteNode2 = assertNodeInCluster(remoteNodeName2); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + logger.info("Relocate primary shard to remote node"); + DiscoveryNode initialPrimaryNode = primaryNodeName(TEST_INDEX).equals(nonRemoteNodeName1) ? nonRemoteNode1 : nonRemoteNode2; + DiscoveryNode initialReplicaNode = initialPrimaryNode.equals(nonRemoteNode1) ? nonRemoteNode2 : nonRemoteNode1; + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialPrimaryNode.getName(), remoteNodeName1)) + .get() + ); + ensureGreen(TEST_INDEX); + assertAllocation(true, remoteNode1); + + logger.info("Verify expected decision for relocating a replica shard on non-remote node"); + Decision decision = getDecisionForTargetNode(initialPrimaryNode, false, true, true); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be relocated to a non-remote node", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt relocation of replica shard to non-remote node"); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialReplicaNode.getName(), initialPrimaryNode.getName())) + .get() + ); + + logger.info("Verify relocation of replica shard to non-remote node"); + ensureGreen(TEST_INDEX); + assertAllocation(false, initialPrimaryNode); + + logger.info("Verify expected decision for relocating a replica shard on remote node"); + decision = getDecisionForTargetNode(remoteNode2, false, true, true); + assertEquals(Decision.Type.YES, decision.type()); + assertEquals( + "[remote_store migration_direction]: replica shard copy can be relocated to a remote node since primary shard copy has been migrated to remote", + decision.getExplanation().toLowerCase(Locale.ROOT) + ); + + logger.info("Attempt relocation of replica shard to remote node"); + assertAcked( + internalCluster().client() + .admin() + .cluster() + .prepareReroute() + .add(new MoveAllocationCommand(TEST_INDEX, 0, initialPrimaryNode.getName(), remoteNodeName2)) + .get() + ); + + logger.info("Verify relocation of replica shard to non-remote node"); + ensureGreen(TEST_INDEX); + assertAllocation(false, remoteNode2); + } + + // When under strict mode, a shard can be allocated to any node + + public void testAlwaysAllocateNewShardForStrictMode() throws Exception { + boolean isRemoteCluster = randomBoolean(); + boolean isReplicaAllocation = randomBoolean(); + + logger.info("Initialize cluster and add nodes"); + setAddRemote(isRemoteCluster); + internalCluster().startClusterManagerOnlyNode(); + String nodeName1 = internalCluster().startDataOnlyNode(); + String nodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode node1 = assertNodeInCluster(nodeName1); + DiscoveryNode node2 = assertNodeInCluster(nodeName2); + + if (isReplicaAllocation) { + prepareIndexWithAllocatedPrimary(node1, Optional.empty()); + } else { + prepareIndexWithoutReplica(Optional.empty()); + } + + if (isRemoteCluster) { + assertRemoteStoreBackedIndex(TEST_INDEX); + } else { + assertNonRemoteStoreBackedIndex(TEST_INDEX); + } + + logger.info("Verify expected decision for allocation of a shard"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode( + isReplicaAllocation ? node2 : randomFrom(node1, node2), + !isReplicaAllocation, + true, + false + ); + assertEquals(Decision.Type.YES, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a %s node for strict compatibility mode", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteCluster ? "remote" : "non-remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt free allocation"); + attemptAllocation(null); + ensureGreen(TEST_INDEX); + + logger.info("Verify allocation of shard"); + assertAllocation(!isReplicaAllocation, !isReplicaAllocation ? null : node2); + } + + // When under mixed mode and remote_store direction, shard of a remote store backed index can not be allocated to a non-remote node + + public void testRemoteStoreBackedIndexShardAllocationForRemoteStoreMigration() throws Exception { + logger.info("Initialize cluster"); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Set mixed mode"); + setClusterMode(MIXED.mode); + + logger.info("Add remote and non-remote nodes"); + String nonRemoteNodeName = internalCluster().startDataOnlyNode(); + setAddRemote(true); + String remoteNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode remoteNode = assertNodeInCluster(remoteNodeName); + DiscoveryNode nonRemoteNode = assertNodeInCluster(nonRemoteNodeName); + + logger.info("Set remote_store direction"); + setDirection(REMOTE_STORE.direction); + + boolean isReplicaAllocation = randomBoolean(); + if (isReplicaAllocation) { + logger.info("Create index with primary allocated on remote node"); + prepareIndexWithAllocatedPrimary(remoteNode, Optional.empty()); + } else { + logger.info("Create index with unallocated primary"); + prepareIndexWithoutReplica(Optional.empty()); + } + + logger.info("Verify remote store backed index"); + assertRemoteStoreBackedIndex(TEST_INDEX); + + logger.info("Verify expected decision for allocation of shard on a non-remote node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(nonRemoteNode, !isReplicaAllocation, false, false); + assertEquals(Decision.Type.NO, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[remote_store migration_direction]: %s shard copy can not be allocated to a non-remote node because a remote store backed index's shard copy can only be allocated to a remote node", + (isReplicaAllocation ? "replica" : "primary") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt allocation of shard on non-remote node"); + attemptAllocation(nonRemoteNodeName); + + logger.info("Verify non-allocation of shard"); + assertNonAllocation(!isReplicaAllocation); + } + + // When under mixed mode and none direction, allocate shard of a remote store backed index to a remote node and shard of a non remote + // store backed index to a non-remote node only + + public void testAllocationForNoneDirectionAndMixedMode() throws Exception { + boolean isRemoteStoreBackedIndex = randomBoolean(); + boolean isReplicaAllocation = randomBoolean(); + logger.info( + String.format( + Locale.ROOT, + "Test for allocation decisions for %s shard of a %s store backed index under NONE direction", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ) + ); + + logger.info("Initialize cluster"); + setAddRemote(isRemoteStoreBackedIndex); + internalCluster().startClusterManagerOnlyNode(); + + logger.info("Add data nodes"); + String previousNodeName1 = internalCluster().startDataOnlyNode(); + String previousNodeName2 = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode previousNode1 = assertNodeInCluster(previousNodeName1); + DiscoveryNode previousNode2 = assertNodeInCluster(previousNodeName2); + + logger.info("Prepare test index"); + if (isReplicaAllocation) { + prepareIndexWithAllocatedPrimary(previousNode1, Optional.empty()); + } else { + prepareIndexWithoutReplica(Optional.empty()); + } + + if (isRemoteStoreBackedIndex) { + assertRemoteStoreBackedIndex(TEST_INDEX); + } else { + assertNonRemoteStoreBackedIndex(TEST_INDEX); + } + + logger.info("Switch to MIXED cluster compatibility mode"); + setClusterMode(MIXED.mode); + setAddRemote(!addRemote); + String newNodeName = internalCluster().startDataOnlyNode(); + internalCluster().validateClusterFormed(); + DiscoveryNode newNode = assertNodeInCluster(newNodeName); + + logger.info("Verify decision for allocation on the new node"); + excludeAllNodes(); + Decision decision = getDecisionForTargetNode(newNode, !isReplicaAllocation, false, false); + assertEquals(Decision.Type.NO, decision.type()); + String expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a %s node for %s store backed index", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "non-remote" : "remote"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt allocation of shard on new node"); + attemptAllocation(newNodeName); + + logger.info("Verify non-allocation of shard"); + assertNonAllocation(!isReplicaAllocation); + + logger.info("Verify decision for allocation on previous node"); + decision = getDecisionForTargetNode(previousNode2, !isReplicaAllocation, true, false); + assertEquals(Decision.Type.YES, decision.type()); + expectedReason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a %s node for %s store backed index", + (isReplicaAllocation ? "replica" : "primary"), + (isRemoteStoreBackedIndex ? "remote" : "non-remote"), + (isRemoteStoreBackedIndex ? "remote" : "non remote") + ); + assertEquals(expectedReason, decision.getExplanation().toLowerCase(Locale.ROOT)); + + logger.info("Attempt free allocation of shard"); + attemptAllocation(null); + + logger.info("Verify successful allocation of shard"); + if (!isReplicaAllocation) { + ensureGreen(TEST_INDEX); + } else { + ensureYellowAndNoInitializingShards(TEST_INDEX); + } + assertAllocation(!isReplicaAllocation, null); + logger.info("Verify allocation on one of the previous nodes"); + ShardRouting shardRouting = getShardRouting(!isReplicaAllocation); + assertTrue( + shardRouting.currentNodeId().equals(previousNode1.getId()) || shardRouting.currentNodeId().equals(previousNode2.getId()) + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java index 8f6c1e2d9a68c..293691ace2edd 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemotePrimaryRelocationIT.java @@ -91,6 +91,10 @@ public void testRemotePrimaryRelocation() throws Exception { int finalCurrentDoc1 = currentDoc; waitUntil(() -> numAutoGenDocs.get() > finalCurrentDoc1 + 5); + // Change direction to remote store + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + logger.info("--> relocating from {} to {} ", docRepNodes, remoteNode); client().admin() .cluster() @@ -179,6 +183,10 @@ public void testMixedModeRelocation_RemoteSeedingFail() throws Exception { .setTransientSettings(Settings.builder().put(RecoverySettings.INDICES_INTERNAL_REMOTE_UPLOAD_TIMEOUT.getKey(), "10s")) .get(); + // Change direction to remote store + updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), "remote_store")); + assertAcked(client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); + logger.info("--> relocating from {} to {} ", docRepNode, remoteNode); client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, docRepNode, remoteNode)).execute().actionGet(); ClusterHealthResponse clusterHealthResponse = client().admin() diff --git a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java index ffcab9483485d..cf689aa554c8b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotemigration/RemoteStoreMigrationShardAllocationBaseTestCase.java @@ -8,13 +8,21 @@ package org.opensearch.remotemigration; +import org.opensearch.action.admin.cluster.allocation.ClusterAllocationExplanation; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.support.ActiveShardCount; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.ShardRouting; +import org.opensearch.cluster.routing.ShardRoutingState; +import org.opensearch.cluster.routing.allocation.AllocateUnassignedDecision; +import org.opensearch.cluster.routing.allocation.MoveDecision; +import org.opensearch.cluster.routing.allocation.NodeAllocationResult; +import org.opensearch.cluster.routing.allocation.decider.Decision; +import org.opensearch.common.Nullable; import org.opensearch.common.settings.Settings; import org.opensearch.core.rest.RestStatus; import org.opensearch.index.IndexSettings; @@ -22,6 +30,7 @@ import org.opensearch.snapshots.SnapshotInfo; import org.opensearch.snapshots.SnapshotState; +import java.util.List; import java.util.Map; import java.util.Optional; @@ -47,7 +56,7 @@ protected void setClusterMode(String mode) { } // set the migration direction for cluster [remote_store, docrep, none] - public void setDirection(String direction) { + protected void setDirection(String direction) { updateSettingsRequest.persistentSettings(Settings.builder().put(MIGRATION_DIRECTION_SETTING.getKey(), direction)); assertAcked(internalCluster().client().admin().cluster().updateSettings(updateSettingsRequest).actionGet()); } @@ -79,7 +88,7 @@ protected String allNodesExcept(String except) { return exclude.toString(); } - // create a new test index + // create a new test index with un-allocated primary and no replicas protected void prepareIndexWithoutReplica(Optional name) { String indexName = name.orElse(TEST_INDEX); internalCluster().client() @@ -96,6 +105,33 @@ protected void prepareIndexWithoutReplica(Optional name) { .actionGet(); } + // create a new test index with allocated primary and 1 unallocated replica + public void prepareIndexWithAllocatedPrimary(DiscoveryNode primaryShardNode, Optional name) { + String indexName = name.orElse(TEST_INDEX); + internalCluster().client() + .admin() + .indices() + .prepareCreate(indexName) + .setSettings( + Settings.builder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 1) + .put("index.routing.allocation.include._name", primaryShardNode.getName()) + .put("index.routing.allocation.exclude._name", allNodesExcept(primaryShardNode.getName())) + ) + .setWaitForActiveShards(ActiveShardCount.ONE) + .execute() + .actionGet(); + + ensureYellowAndNoInitializingShards(TEST_INDEX); + + logger.info(" --> verify allocation of primary shard"); + assertAllocation(true, primaryShardNode); + + logger.info(" --> verify non-allocation of replica shard"); + assertNonAllocation(false); + } + protected ShardRouting getShardRouting(boolean isPrimary) { IndexShardRoutingTable table = internalCluster().client() .admin() @@ -110,6 +146,130 @@ protected ShardRouting getShardRouting(boolean isPrimary) { return (isPrimary ? table.primaryShard() : table.replicaShards().get(0)); } + // obtain decision for allocation/relocation of a shard to a given node + protected Decision getDecisionForTargetNode( + DiscoveryNode targetNode, + boolean isPrimary, + boolean includeYesDecisions, + boolean isRelocation + ) { + ClusterAllocationExplanation explanation = internalCluster().client() + .admin() + .cluster() + .prepareAllocationExplain() + .setIndex(TEST_INDEX) + .setShard(0) + .setPrimary(isPrimary) + .setIncludeYesDecisions(includeYesDecisions) + .get() + .getExplanation(); + + Decision requiredDecision = null; + List nodeAllocationResults; + if (isRelocation) { + MoveDecision moveDecision = explanation.getShardAllocationDecision().getMoveDecision(); + nodeAllocationResults = moveDecision.getNodeDecisions(); + } else { + AllocateUnassignedDecision allocateUnassignedDecision = explanation.getShardAllocationDecision().getAllocateDecision(); + nodeAllocationResults = allocateUnassignedDecision.getNodeDecisions(); + } + + for (NodeAllocationResult nodeAllocationResult : nodeAllocationResults) { + if (nodeAllocationResult.getNode().equals(targetNode)) { + for (Decision decision : nodeAllocationResult.getCanAllocateDecision().getDecisions()) { + if (decision.label().equals(NAME)) { + requiredDecision = decision; + break; + } + } + } + } + + assertNotNull(requiredDecision); + return requiredDecision; + } + + // get allocation and relocation decisions for all nodes + protected void excludeAllNodes() { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings( + Settings.builder() + .put("index.routing.allocation.include._name", "") + .put("index.routing.allocation.exclude._name", allNodesExcept(null)) + ) + .execute() + .actionGet() + ); + } + + protected void includeAllNodes() { + assertAcked( + internalCluster().client() + .admin() + .indices() + .prepareUpdateSettings(TEST_INDEX) + .setSettings( + Settings.builder() + .put("index.routing.allocation.exclude._name", "") + .put("index.routing.allocation.include._name", allNodesExcept(null)) + ) + .execute() + .actionGet() + ); + } + + protected void attemptAllocation(@Nullable String targetNodeName) { + Settings.Builder settingsBuilder; + if (targetNodeName != null) { + settingsBuilder = Settings.builder() + .put("index.routing.allocation.include._name", targetNodeName) + .put("index.routing.allocation.exclude._name", allNodesExcept(targetNodeName)); + } else { + String clusterManagerNodeName = internalCluster().client() + .admin() + .cluster() + .prepareState() + .execute() + .actionGet() + .getState() + .getNodes() + .getClusterManagerNode() + .getName(); + // to allocate freely among all nodes other than cluster-manager node + settingsBuilder = Settings.builder() + .put("index.routing.allocation.include._name", allNodesExcept(clusterManagerNodeName)) + .put("index.routing.allocation.exclude._name", clusterManagerNodeName); + } + internalCluster().client().admin().indices().prepareUpdateSettings(TEST_INDEX).setSettings(settingsBuilder).execute().actionGet(); + } + + // verify that shard does not exist at targetNode + protected void assertNonAllocation(boolean isPrimary) { + if (isPrimary) { + ensureRed(TEST_INDEX); + } else { + ensureYellowAndNoInitializingShards(TEST_INDEX); + } + ShardRouting shardRouting = getShardRouting(isPrimary); + assertFalse(shardRouting.active()); + assertNull(shardRouting.currentNodeId()); + assertEquals(ShardRoutingState.UNASSIGNED, shardRouting.state()); + } + + // verify that shard exists at targetNode + protected void assertAllocation(boolean isPrimary, @Nullable DiscoveryNode targetNode) { + ShardRouting shardRouting = getShardRouting(isPrimary); + assertTrue(shardRouting.active()); + assertNotNull(shardRouting.currentNodeId()); + if (targetNode != null) { + assertEquals(targetNode.getId(), shardRouting.currentNodeId()); + } + } + // create a snapshot public static SnapshotInfo createSnapshot(String snapshotRepoName, String snapshotName, String... indices) { SnapshotInfo snapshotInfo = internalCluster().client() @@ -194,4 +354,5 @@ public static void assertRemoteStoreBackedIndex(String indexName) { INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(indexSettings) ); } + } diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index b31985a260361..121f8d935cf48 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1664,7 +1664,7 @@ public static void validateRefreshIntervalSettings(Settings requestSettings, Clu * @param clusterSettings cluster setting */ static void validateTranslogDurabilitySettings(Settings requestSettings, ClusterSettings clusterSettings, Settings settings) { - if (isRemoteDataAttributePresent(settings) == false + if ((isRemoteDataAttributePresent(settings) == false && isMigratingToRemoteStore(clusterSettings) == false) || IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.exists(requestSettings) == false || clusterSettings.get(IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING) == false) { return; diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java index 7d40aacb71e25..4fc5fff805663 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/decider/RemoteStoreMigrationAllocationDecider.java @@ -95,32 +95,38 @@ public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, Routing ); } - if (migrationDirection.equals(Direction.REMOTE_STORE) == false) { - // docrep migration direction is currently not supported + IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); + boolean remoteSettingsBackedIndex = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings()); + + if (migrationDirection.equals(Direction.NONE)) { + // remote backed indices on docrep nodes and non remote backed indices on remote nodes are not allowed + boolean isNoDecision = remoteSettingsBackedIndex ^ targetNode.isRemoteStoreNode(); + String reason = String.format(Locale.ROOT, " for %sremote store backed index", remoteSettingsBackedIndex ? "" : "non "); return allocation.decision( - Decision.YES, + isNoDecision ? Decision.NO : Decision.YES, NAME, - getDecisionDetails(true, shardRouting, targetNode, " for non remote_store direction") + getDecisionDetails(!isNoDecision, shardRouting, targetNode, reason) ); - } - - // check for remote store backed indices - IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); - boolean remoteStoreBackedIndex = IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING.get(indexMetadata.getSettings()); - if (remoteStoreBackedIndex && targetNode.isRemoteStoreNode() == false) { - // allocations and relocations must be to a remote node - String reason = String.format( - Locale.ROOT, - " because a remote store backed index's shard copy can only be %s to a remote node", - ((shardRouting.assignedToNode() == false) ? "allocated" : "relocated") - ); - return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, shardRouting, targetNode, reason)); - } + } else if (migrationDirection.equals(Direction.DOCREP)) { + // docrep migration direction is currently not supported + return allocation.decision(Decision.YES, NAME, getDecisionDetails(true, shardRouting, targetNode, " for DOCREP direction")); + } else { + // check for remote store backed indices + if (remoteSettingsBackedIndex && targetNode.isRemoteStoreNode() == false) { + // allocations and relocations must be to a remote node + String reason = String.format( + Locale.ROOT, + " because a remote store backed index's shard copy can only be %s to a remote node", + ((shardRouting.assignedToNode() == false) ? "allocated" : "relocated") + ); + return allocation.decision(Decision.NO, NAME, getDecisionDetails(false, shardRouting, targetNode, reason)); + } - if (shardRouting.primary()) { - return primaryShardDecision(shardRouting, targetNode, allocation); + if (shardRouting.primary()) { + return primaryShardDecision(shardRouting, targetNode, allocation); + } + return replicaShardDecision(shardRouting, targetNode, allocation); } - return replicaShardDecision(shardRouting, targetNode, allocation); } // handle scenarios for allocation of a new shard's primary copy diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java index 3e130a42952e4..ee4dbe9738e04 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/RemoteStoreMigrationAllocationDeciderTests.java @@ -70,6 +70,8 @@ import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; import static org.opensearch.common.util.FeatureFlags.REMOTE_STORE_MIGRATION_EXPERIMENTAL; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.NONE; +import static org.opensearch.node.remotestore.RemoteStoreNodeService.Direction.REMOTE_STORE; import static org.opensearch.node.remotestore.RemoteStoreNodeService.MIGRATION_DIRECTION_SETTING; import static org.opensearch.node.remotestore.RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING; import static org.hamcrest.core.Is.is; @@ -89,7 +91,7 @@ public class RemoteStoreMigrationAllocationDeciderTests extends OpenSearchAlloca .build(); private final Settings remoteStoreDirectionSettings = Settings.builder() - .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.REMOTE_STORE) + .put(MIGRATION_DIRECTION_SETTING.getKey(), REMOTE_STORE) .build(); private final Settings docrepDirectionSettings = Settings.builder() .put(MIGRATION_DIRECTION_SETTING.getKey(), RemoteStoreNodeService.Direction.DOCREP) @@ -106,7 +108,9 @@ public class RemoteStoreMigrationAllocationDeciderTests extends OpenSearchAlloca private Metadata metadata; private RoutingTable routingTable = null; - private void beforeAllocation() { + private ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); + + private void beforeAllocation(String direction) { FeatureFlags.initializeFeatureFlags(directionEnabledNodeSettings); if (isRemoteStoreBackedIndex == null) { isRemoteStoreBackedIndex = randomBoolean(); @@ -116,11 +120,7 @@ private void beforeAllocation() { String compatibilityMode = isMixedMode ? RemoteStoreNodeService.CompatibilityMode.MIXED.mode : RemoteStoreNodeService.CompatibilityMode.STRICT.mode; - customSettings = getCustomSettings( - RemoteStoreNodeService.Direction.REMOTE_STORE.direction, - compatibilityMode, - indexMetadataBuilder - ); + customSettings = getCustomSettings(direction, compatibilityMode, indexMetadataBuilder); if (routingTable != null) { metadata = Metadata.builder().put(indexMetadataBuilder).build(); @@ -149,6 +149,35 @@ private void beforeAllocation() { routingAllocation.debugDecision(true); } + private void prepareRoutingTable(boolean isReplicaAllocation, String primaryShardNodeId) { + routingTable = RoutingTable.builder() + .add( + IndexRoutingTable.builder(shardId.getIndex()) + .addIndexShard( + new IndexShardRoutingTable.Builder(shardId).addShard( + TestShardRouting.newShardRouting( + shardId.getIndexName(), + shardId.getId(), + (isReplicaAllocation ? primaryShardNodeId : null), + true, + (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) + ) + ) + .addShard( + TestShardRouting.newShardRouting( + shardId.getIndexName(), + shardId.getId(), + null, + false, + ShardRoutingState.UNASSIGNED + ) + ) + .build() + ) + ) + .build(); + } + // tests for primary shard copy allocation with MIXED mode and REMOTE_STORE direction public void testDontAllocateNewPrimaryShardOnNonRemoteNodeForMixedModeAndRemoteStoreDirection() { @@ -166,7 +195,7 @@ public void testDontAllocateNewPrimaryShardOnNonRemoteNodeForMixedModeAndRemoteS .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); ShardRouting primaryShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); RoutingNode nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode.getId()); @@ -196,7 +225,7 @@ public void testAllocateNewPrimaryShardOnRemoteNodeForMixedModeAndRemoteStoreDir .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); ShardRouting primaryShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); RoutingNode remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode.getId()); @@ -216,39 +245,11 @@ public void testDontAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnNonRemote replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode = getNonRemoteNode(); DiscoveryNode remoteNode = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - nonRemoteNode.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary on non-remote node, new replica's allocation + prepareRoutingTable(true, nonRemoteNode.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode) @@ -257,7 +258,7 @@ public void testDontAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnNonRemote .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -278,40 +279,12 @@ public void testAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnRemoteNodeFor replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode remoteNode1 = getRemoteNode(); DiscoveryNode remoteNode2 = getRemoteNode(); DiscoveryNode nonRemoteNode = getNonRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary on remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - remoteNode1.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary on remote node, new replica's allocation + prepareRoutingTable(true, remoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode1) @@ -322,7 +295,7 @@ public void testAllocateNewReplicaShardOnRemoteNodeIfPrimaryShardOnRemoteNodeFor .localNodeId(nonRemoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -343,40 +316,12 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnNonRemoteN replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode remoteNode = getRemoteNode(); DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary shard on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - nonRemoteNode1.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary shard on non-remote node, new replica's allocation + prepareRoutingTable(true, nonRemoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode) @@ -387,7 +332,7 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnNonRemoteN .localNodeId(nonRemoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -411,39 +356,11 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnRemoteNode replicaCount = 1; isMixedMode = true; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode = getNonRemoteNode(); DiscoveryNode remoteNode = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - // primary shard on non-remote node - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - remoteNode.getId(), - true, - ShardRoutingState.STARTED - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + // primary shard on remote node, new replica's allocation + prepareRoutingTable(true, remoteNode.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode) @@ -452,7 +369,7 @@ public void testAllocateNewReplicaShardOnNonRemoteNodeIfPrimaryShardOnRemoteNode .localNodeId(remoteNode.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); ShardRouting replicaShardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); @@ -478,39 +395,12 @@ public void testAlwaysAllocateNewShardForStrictMode() { isMixedMode = false; isRemoteStoreBackedIndex = false; - ShardId shardId = new ShardId(TEST_INDEX, "_na_", 0); - DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); boolean isReplicaAllocation = randomBoolean(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - (isReplicaAllocation ? nonRemoteNode1.getId() : null), - true, - (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) - ) - ) - .addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + prepareRoutingTable(isReplicaAllocation, nonRemoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(nonRemoteNode1) @@ -519,7 +409,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { .localNodeId(nonRemoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -543,33 +433,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { DiscoveryNode remoteNode1 = getRemoteNode(); DiscoveryNode remoteNode2 = getRemoteNode(); - routingTable = RoutingTable.builder() - .add( - IndexRoutingTable.builder(shardId.getIndex()) - .addIndexShard( - new IndexShardRoutingTable.Builder(shardId).addShard( - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - (isReplicaAllocation ? remoteNode1.getId() : null), - true, - (isReplicaAllocation ? ShardRoutingState.STARTED : ShardRoutingState.UNASSIGNED) - ) - ) - .addShard( - // new replica's allocation - TestShardRouting.newShardRouting( - shardId.getIndexName(), - shardId.getId(), - null, - false, - ShardRoutingState.UNASSIGNED - ) - ) - .build() - ) - ) - .build(); + prepareRoutingTable(isReplicaAllocation, remoteNode1.getId()); discoveryNodes = DiscoveryNodes.builder() .add(remoteNode1) @@ -578,7 +442,7 @@ public void testAlwaysAllocateNewShardForStrictMode() { .localNodeId(remoteNode2.getId()) .build(); - beforeAllocation(); + beforeAllocation(REMOTE_STORE.direction); assertEquals(2, clusterState.getRoutingTable().allShards().size()); @@ -598,6 +462,97 @@ public void testAlwaysAllocateNewShardForStrictMode() { assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); } + // test for NONE direction + public void testAllocationForNoneDirection() { + shardCount = 1; + replicaCount = 1; + isMixedMode = true; + isRemoteStoreBackedIndex = false; // non-remote store backed index + + DiscoveryNode remoteNode1 = getRemoteNode(); + DiscoveryNode remoteNode2 = getRemoteNode(); + DiscoveryNode nonRemoteNode1 = getNonRemoteNode(); + DiscoveryNode nonRemoteNode2 = getNonRemoteNode(); + + boolean isReplicaAllocation = randomBoolean(); + + prepareRoutingTable(isReplicaAllocation, nonRemoteNode1.getId()); + + discoveryNodes = DiscoveryNodes.builder() + .add(remoteNode1) + .localNodeId(remoteNode1.getId()) + .add(remoteNode2) + .localNodeId(remoteNode2.getId()) + .add(nonRemoteNode1) + .localNodeId(nonRemoteNode1.getId()) + .add(nonRemoteNode2) + .localNodeId(nonRemoteNode2.getId()) + .build(); + + beforeAllocation(NONE.direction); + assertEquals(2, clusterState.getRoutingTable().allShards().size()); + + ShardRouting shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); + if (isReplicaAllocation) { + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); + } + RoutingNode nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode2.getId()); + RoutingNode remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode2.getId()); + + // allocation decision for non-remote node for non-remote store backed index + Decision decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, nonRemoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.YES)); + String reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a non-remote node for non remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + // allocation decision for remote node for non-remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, remoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.NO)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a remote node for non remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + isRemoteStoreBackedIndex = true; // remote store backed index + prepareRoutingTable(isReplicaAllocation, remoteNode1.getId()); + + beforeAllocation(NONE.direction); + assertEquals(2, clusterState.getRoutingTable().allShards().size()); + + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).primaryShard(); + if (isReplicaAllocation) { + shardRouting = clusterState.getRoutingTable().shardRoutingTable(TEST_INDEX, 0).replicaShards().get(0); + } + nonRemoteRoutingNode = clusterState.getRoutingNodes().node(nonRemoteNode2.getId()); + remoteRoutingNode = clusterState.getRoutingNodes().node(remoteNode2.getId()); + + // allocation decision for non-remote node for remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, nonRemoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.NO)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can not be allocated to a non-remote node for remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + + // allocation decision for remote node for remote store backed index + decision = remoteStoreMigrationAllocationDecider.canAllocate(shardRouting, remoteRoutingNode, routingAllocation); + assertThat(decision.type(), is(Decision.Type.YES)); + reason = String.format( + Locale.ROOT, + "[none migration_direction]: %s shard copy can be allocated to a remote node for remote store backed index", + (isReplicaAllocation ? "replica" : "primary") + ); + assertThat(decision.getExplanation().toLowerCase(Locale.ROOT), is(reason)); + } + // prepare index metadata for test-index private IndexMetadata.Builder getIndexMetadataBuilder(boolean isRemoteStoreBackedIndex, int shardCount, int replicaCount) { Settings.Builder builder = settings(Version.CURRENT); @@ -614,7 +569,7 @@ private IndexMetadata.Builder getIndexMetadataBuilder(boolean isRemoteStoreBacke private Settings getCustomSettings(String direction, String compatibilityMode, IndexMetadata.Builder indexMetadataBuilder) { Settings.Builder builder = Settings.builder(); // direction settings - if (direction.toLowerCase(Locale.ROOT).equals(RemoteStoreNodeService.Direction.REMOTE_STORE.direction)) { + if (direction.toLowerCase(Locale.ROOT).equals(REMOTE_STORE.direction)) { builder.put(remoteStoreDirectionSettings); } else if (direction.toLowerCase(Locale.ROOT).equals(RemoteStoreNodeService.Direction.DOCREP.direction)) { builder.put(docrepDirectionSettings);