From 43cfe4ebbb608f79124b88f0f510ee91ca79009d Mon Sep 17 00:00:00 2001 From: Marc Handalian Date: Wed, 18 Dec 2024 11:19:06 -0800 Subject: [PATCH] Add more tests for failover case Signed-off-by: Marc Handalian --- .../SearchReplicaReplicationIT.java | 48 ++++++++++++++- .../replication/SearchReplicaRestoreIT.java | 58 +++++++++++++++++++ 2 files changed, 105 insertions(+), 1 deletion(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java index b9bc592071aa1..359a1651b5c57 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaReplicationIT.java @@ -8,14 +8,18 @@ package org.opensearch.indices.replication; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.cluster.health.ClusterHealthStatus; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.junit.After; import java.nio.file.Path; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -46,7 +50,7 @@ public Settings indexSettings() { return Settings.builder() .put(super.indexSettings()) .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) - .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + .put(SETTING_NUMBER_OF_REPLICAS, 0) .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) .build(); } @@ -107,4 +111,46 @@ public void testRecoveryAfterDocsIndexed() throws Exception { ensureGreen(INDEX_NAME); assertDocCounts(10, replica); } + + public void testFailoverToNewPrimaryWithPollingReplication() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex(INDEX_NAME); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + assertDocCounts(10, replica); + + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(SETTING_NUMBER_OF_REPLICAS, 1)) + .get(); + final String writer_replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + + // stop the primary + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + + assertBusy(() -> { + ClusterHealthResponse clusterHealthResponse = clusterAdmin().prepareHealth(INDEX_NAME).get(); + assertEquals(ClusterHealthStatus.YELLOW, clusterHealthResponse.getStatus()); + }); + ClusterHealthResponse clusterHealthResponse = clusterAdmin().prepareHealth(INDEX_NAME).get(); + assertEquals(ClusterHealthStatus.YELLOW, clusterHealthResponse.getStatus()); + assertDocCounts(10, replica); + + for (int i = docCount; i < docCount * 2; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + assertBusy(() -> { assertDocCounts(20, replica, writer_replica); }); + + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java index e8d65e07c7dd9..717dd0d701dde 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SearchReplicaRestoreIT.java @@ -8,19 +8,25 @@ package org.opensearch.indices.replication; +import org.opensearch.action.admin.cluster.health.ClusterHealthResponse; +import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotRequestBuilder; import org.opensearch.action.search.SearchResponse; +import org.opensearch.cluster.health.ClusterHealthStatus; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.rest.RestStatus; import org.opensearch.index.query.QueryBuilders; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.remotestore.RemoteSnapshotIT; import org.opensearch.snapshots.SnapshotRestoreException; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.util.List; +import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; @@ -97,6 +103,58 @@ public void testSearchReplicaRestore_WhenSnapshotOnSegRepWithSearchReplica_Resto assertTrue(exception.getMessage().contains(getSnapshotExceptionMessage(ReplicationType.SEGMENT, ReplicationType.DOCUMENT))); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/15952") + public void testStopPrimary_RestoreOnNewNode() throws Exception { + internalCluster().startClusterManagerOnlyNode(); + final String primary = internalCluster().startDataOnlyNode(); + createIndex( + INDEX_NAME, + Settings.builder() + .put(indexSettings()) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetadata.SETTING_NUMBER_OF_SEARCH_REPLICAS, 1) + .build() + ); + ensureYellowAndNoInitializingShards(INDEX_NAME); + final int docCount = 10; + for (int i = 0; i < docCount; i++) { + client().prepareIndex(INDEX_NAME).setId(Integer.toString(i)).setSource("field", "value" + i).execute().get(); + } + refresh(INDEX_NAME); + assertDocCounts(docCount, primary); + + final String replica = internalCluster().startDataOnlyNode(); + ensureGreen(INDEX_NAME); + assertDocCounts(docCount, replica); + createRepository(REPOSITORY_NAME, FS_REPOSITORY_TYPE, randomRepoPath().toAbsolutePath()); + createSnapshot(REPOSITORY_NAME, SNAPSHOT_NAME, List.of(INDEX_NAME)); + // stop the primary + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primary)); + + assertBusy(() -> { + ClusterHealthResponse clusterHealthResponse = clusterAdmin().prepareHealth(INDEX_NAME).get(); + assertEquals(ClusterHealthStatus.RED, clusterHealthResponse.getStatus()); + }); + assertDocCounts(docCount, replica); + + String restoredPrimary = internalCluster().startDataOnlyNode(); + + RestoreSnapshotRequestBuilder builder = client().admin() + .cluster() + .prepareRestoreSnapshot(REPOSITORY_NAME, SNAPSHOT_NAME) + .setWaitForCompletion(true); + assertEquals(builder.get().status(), RestStatus.ACCEPTED); + ensureGreen(INDEX_NAME); + assertDocCounts(docCount, replica, restoredPrimary); + } + + protected void assertDocCounts(int expectedDocCount, String... nodeNames) { + for (String node : nodeNames) { + assertHitCount(client(node).prepareSearch(INDEX_NAME).setSize(0).setPreference("_only_local").get(), expectedDocCount); + } + } + private void bootstrapIndexWithOutSearchReplicas(ReplicationType replicationType) throws InterruptedException { startCluster(2);