diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4fa118e8486f1..8076adcf00ca9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @reta @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @gbbafna @setiah @kartg @kotwanikunal @mch2 @nknize @owaiskazi19 @peternied @Rishikesh1159 @ryanbogan @saratvemulapalli @shwetathareja @dreamer-89 @tlfeng @VachaShah @dbwiddis @sachinpkale @sohami @msfroh +* @abbashus @adnapibar @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @dbwiddis @dreamer-89 @gbbafna @kartg @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @ryanbogan @sachinpkale @saratvemulapalli @setiah @shwetathareja @sohami @tlfeng @VachaShah diff --git a/CHANGELOG.md b/CHANGELOG.md index 3471564dba15e..0ad18b94f31b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow mmap to use new JDK-19 preview APIs in Apache Lucene 9.4+ ([#5151](https://github.com/opensearch-project/OpenSearch/pull/5151)) - Add events correlation engine plugin ([#6854](https://github.com/opensearch-project/OpenSearch/issues/6854)) - Introduce new dynamic cluster setting to control slice computation for concurrent segment search ([#9107](https://github.com/opensearch-project/OpenSearch/pull/9107)) -- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679)) +- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679), [#10664](https://github.com/opensearch-project/OpenSearch/pull/10664)) - Provide service accounts tokens to extensions ([#9618](https://github.com/opensearch-project/OpenSearch/pull/9618)) - [Admission control] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10541](https://github.com/opensearch-project/OpenSearch/pull/10541)) - [Admission control] Add Resource usage collector service and resource usage tracker ([#9890](https://github.com/opensearch-project/OpenSearch/pull/9890)) -- Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) +- [Remote cluster state] Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) +- [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) +- [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -87,6 +90,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) +- [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) @@ -121,4 +125,4 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Security [Unreleased 3.0]: https://github.com/opensearch-project/OpenSearch/compare/2.x...HEAD -[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x \ No newline at end of file +[Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/2.12...2.x diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 69c7899a8685d..4dcaab5d31c96 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -37,6 +37,8 @@ import software.amazon.awssdk.core.internal.http.pipeline.stages.ApplyTransactionIdStage; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.SuppressForbidden; @@ -51,10 +53,15 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryMissingException; +import org.opensearch.repositories.RepositoryStats; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.blobstore.OpenSearchMockAPIBasedRepositoryIntegTestCase; import org.opensearch.repositories.s3.utils.AwsRequestSigner; import org.opensearch.snapshots.mockstore.BlobStoreWrapper; +import org.opensearch.test.BackgroundIndexer; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; @@ -63,12 +70,18 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.stream.StreamSupport; import fixture.s3.S3HttpHandler; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; @SuppressForbidden(reason = "this test uses a HttpServer to emulate an S3 endpoint") // Need to set up a new cluster for each test because cluster settings use randomized authentication settings @@ -152,6 +165,66 @@ protected Settings nodeSettings(int nodeOrdinal) { return builder.build(); } + @Override + public void testRequestStats() throws Exception { + final String repository = createRepository(randomName()); + final String index = "index-no-merges"; + createIndex( + index, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + + final long nbDocs = randomLongBetween(10_000L, 20_000L); + try (BackgroundIndexer indexer = new BackgroundIndexer(index, "_doc", client(), (int) nbDocs)) { + waitForDocs(nbDocs, indexer); + } + + flushAndRefresh(index); + ForceMergeResponse forceMerge = client().admin().indices().prepareForceMerge(index).setFlush(true).setMaxNumSegments(1).get(); + assertThat(forceMerge.getSuccessfulShards(), equalTo(1)); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + final String snapshot = "snapshot"; + assertSuccessfulSnapshot( + client().admin().cluster().prepareCreateSnapshot(repository, snapshot).setWaitForCompletion(true).setIndices(index) + ); + + assertAcked(client().admin().indices().prepareDelete(index)); + + assertSuccessfulRestore(client().admin().cluster().prepareRestoreSnapshot(repository, snapshot).setWaitForCompletion(true)); + ensureGreen(index); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + assertAcked(client().admin().cluster().prepareDeleteSnapshot(repository, snapshot).get()); + + final RepositoryStats repositoryStats = StreamSupport.stream( + internalCluster().getInstances(RepositoriesService.class).spliterator(), + false + ).map(repositoriesService -> { + try { + return repositoriesService.repository(repository); + } catch (RepositoryMissingException e) { + return null; + } + }).filter(Objects::nonNull).map(Repository::stats).reduce(RepositoryStats::merge).get(); + + Map> extendedStats = repositoryStats.extendedStats; + Map aggregatedStats = new HashMap<>(); + extendedStats.forEach((k, v) -> { + if (k == BlobStore.Metric.RETRY_COUNT || k == BlobStore.Metric.REQUEST_SUCCESS || k == BlobStore.Metric.REQUEST_FAILURE) { + for (Map.Entry entry : v.entrySet()) { + aggregatedStats.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + } + + }); + final Map mockCalls = getMockRequestCounts(); + + String assertionErrorMsg = String.format("SDK sent [%s] calls and handler measured [%s] calls", aggregatedStats, mockCalls); + + assertEquals(assertionErrorMsg, mockCalls, aggregatedStats); + } + /** * S3RepositoryPlugin that allows to disable chunked encoding and to set a low threshold between single upload and multipart upload. */ @@ -263,6 +336,8 @@ public void maybeTrack(final String request, Headers requestHeaders) { trackRequest("PutMultipartObject"); } else if (Regex.simpleMatch("PUT /*/*", request)) { trackRequest("PutObject"); + } else if (Regex.simpleMatch("POST /*?delete*", request)) { + trackRequest("DeleteObjects"); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 846c1ce60da01..c1180aab0e0c7 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -204,7 +204,7 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp s3AsyncClient = amazonS3Reference.get().client(); } CompletableFuture completableFuture = blobStore.getAsyncTransferManager() - .uploadObject(s3AsyncClient, uploadRequest, streamContext); + .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { if (throwable == null) { completionListener.onResponse(response); @@ -389,7 +389,7 @@ private void doDeleteBlobs(List blobNames, boolean relative) throws IOEx assert outstanding.isEmpty(); } - private static DeleteObjectsRequest bulkDelete(String bucket, List blobs) { + private DeleteObjectsRequest bulkDelete(String bucket, List blobs) { return DeleteObjectsRequest.builder() .bucket(bucket) .delete( @@ -398,6 +398,7 @@ private static DeleteObjectsRequest bulkDelete(String bucket, List blobs .quiet(true) .build() ) + .overrideConfiguration(o -> o.addMetricPublisher(blobStore.getStatsMetricPublisher().deleteObjectsMetricPublisher)) .build(); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index 1c98a0a82dcef..e8e043357e126 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -47,6 +47,8 @@ import org.opensearch.repositories.s3.async.AsyncTransferManager; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -183,6 +185,16 @@ public Map stats() { return statsMetricPublisher.getStats().toMap(); } + @Override + public Map> extendedStats() { + if (statsMetricPublisher.getExtendedStats() == null || statsMetricPublisher.getExtendedStats().isEmpty()) { + return Collections.emptyMap(); + } + Map> extendedStats = new HashMap<>(); + statsMetricPublisher.getExtendedStats().forEach((k, v) -> extendedStats.put(k, v.toMap())); + return extendedStats; + } + public ObjectCannedACL getCannedACL() { return cannedACL; } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index 075701bb3476a..f668722f0a011 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -55,6 +56,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; +import org.opensearch.threadpool.ScalingExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -97,24 +99,25 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { @Override public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); + int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); executorBuilders.add( - new FixedExecutorBuilder(settings, URGENT_FUTURE_COMPLETION, urgentPoolCount(settings), 10_000, URGENT_FUTURE_COMPLETION) - ); - executorBuilders.add( - new FixedExecutorBuilder(settings, URGENT_STREAM_READER, urgentPoolCount(settings), 10_000, URGENT_STREAM_READER) + new FixedExecutorBuilder(settings, URGENT_FUTURE_COMPLETION, urgentPoolCount(settings), 10_000, URGENT_FUTURE_COMPLETION) ); + executorBuilders.add(new ScalingExecutorBuilder(URGENT_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); - executorBuilders.add( - new FixedExecutorBuilder(settings, PRIORITY_STREAM_READER, priorityPoolCount(settings), 10_000, PRIORITY_STREAM_READER) - ); + executorBuilders.add(new ScalingExecutorBuilder(PRIORITY_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); executorBuilders.add(new FixedExecutorBuilder(settings, FUTURE_COMPLETION, normalPoolCount(settings), 10_000, FUTURE_COMPLETION)); - executorBuilders.add(new FixedExecutorBuilder(settings, STREAM_READER, normalPoolCount(settings), 10_000, STREAM_READER)); + executorBuilders.add(new ScalingExecutorBuilder(STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); return executorBuilders; } + static int halfAllocatedProcessorsMaxFive(final int allocatedProcessors) { + return boundedBy((allocatedProcessors + 1) / 2, 1, 5); + } + S3RepositoryPlugin(final Settings settings, final Path configPath, final S3Service service, final S3AsyncService s3AsyncService) { this.service = Objects.requireNonNull(service, "S3 service must not be null"); this.configPath = configPath; diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java index cad0037f99249..0c63bfdb1ff97 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java @@ -8,10 +8,13 @@ package org.opensearch.repositories.s3; -import software.amazon.awssdk.http.HttpMetric; import software.amazon.awssdk.metrics.MetricCollection; import software.amazon.awssdk.metrics.MetricPublisher; +import software.amazon.awssdk.metrics.MetricRecord; +import org.opensearch.common.blobstore.BlobStore; + +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; @@ -20,18 +23,67 @@ public class StatsMetricPublisher { private final Stats stats = new Stats(); + private final Map extendedStats = new HashMap<>() { + { + put(BlobStore.Metric.REQUEST_LATENCY, new Stats()); + put(BlobStore.Metric.REQUEST_SUCCESS, new Stats()); + put(BlobStore.Metric.REQUEST_FAILURE, new Stats()); + put(BlobStore.Metric.RETRY_COUNT, new Stats()); + } + }; + public MetricPublisher listObjectsMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.listCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).listMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).listMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).listMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).listMetrics.addAndGet(1); + } + stats.listMetrics.addAndGet(1); + break; + } + } + } + + @Override + public void close() {} + }; + + public MetricPublisher deleteObjectsMetricPublisher = new MetricPublisher() { + @Override + public void publish(MetricCollection metricCollection) { + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).deleteMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).deleteMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).deleteMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).deleteMetrics.addAndGet(1); + } + stats.deleteMetrics.addAndGet(1); + break; + } + } } @Override @@ -41,15 +93,26 @@ public void close() {} public MetricPublisher getObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.getCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).getMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).getMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).getMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).getMetrics.addAndGet(1); + } + stats.getMetrics.addAndGet(1); + break; + } + } } @Override @@ -59,15 +122,26 @@ public void close() {} public MetricPublisher putObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.putCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).putMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).putMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).putMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).putMetrics.addAndGet(1); + } + stats.putMetrics.addAndGet(1); + break; + } + } } @Override @@ -77,15 +151,26 @@ public void close() {} public MetricPublisher multipartUploadMetricCollector = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.postCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).multiPartPutMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).multiPartPutMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).multiPartPutMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).multiPartPutMetrics.addAndGet(1); + } + stats.multiPartPutMetrics.addAndGet(1); + break; + } + } } @Override @@ -96,22 +181,29 @@ public Stats getStats() { return stats; } + public Map getExtendedStats() { + return extendedStats; + } + static class Stats { - final AtomicLong listCount = new AtomicLong(); + final AtomicLong listMetrics = new AtomicLong(); + + final AtomicLong getMetrics = new AtomicLong(); - final AtomicLong getCount = new AtomicLong(); + final AtomicLong putMetrics = new AtomicLong(); - final AtomicLong putCount = new AtomicLong(); + final AtomicLong deleteMetrics = new AtomicLong(); - final AtomicLong postCount = new AtomicLong(); + final AtomicLong multiPartPutMetrics = new AtomicLong(); Map toMap() { final Map results = new HashMap<>(); - results.put("GetObject", getCount.get()); - results.put("ListObjects", listCount.get()); - results.put("PutObject", putCount.get()); - results.put("PutMultipartObject", postCount.get()); + results.put("GetObject", getMetrics.get()); + results.put("ListObjects", listMetrics.get()); + results.put("PutObject", putMetrics.get()); + results.put("DeleteObjects", deleteMetrics.get()); + results.put("PutMultipartObject", multiPartPutMetrics.get()); return results; } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index 9a4eff19412ca..933ee6dc29513 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -23,10 +23,13 @@ import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -147,26 +150,39 @@ private static void uploadPart( } else { streamReadExecutor = executorService; } + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); CompletableFuture uploadPartResponseFuture = SocketAccess.doPrivileged( () -> s3AsyncClient.uploadPart( uploadPartRequest, - AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), - inputStreamContainer.getContentLength(), - streamReadExecutor - ) + AsyncRequestBody.fromInputStream(inputStream, inputStreamContainer.getContentLength(), streamReadExecutor) ) ); - CompletableFuture convertFuture = uploadPartResponseFuture.thenApply( - uploadPartResponse -> convertUploadPartResponse( - completedParts, - inputStreamContainers, - uploadPartResponse, - partNumber, - uploadRequest.doRemoteDataIntegrityCheck() - ) - ); + CompletableFuture convertFuture = uploadPartResponseFuture.whenComplete((resp, throwable) -> { + try { + inputStream.close(); + } catch (IOException ex) { + log.error( + () -> new ParameterizedMessage( + "Failed to close stream while uploading a part of idx {} and file {}.", + uploadPartRequest.partNumber(), + uploadPartRequest.key() + ), + ex + ); + } + }) + .thenApply( + uploadPartResponse -> convertUploadPartResponse( + completedParts, + inputStreamContainers, + uploadPartResponse, + partNumber, + uploadRequest.doRemoteDataIntegrityCheck() + ) + ); futures.add(convertFuture); CompletableFutureUtils.forwardExceptionTo(convertFuture, uploadPartResponseFuture); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 8be7cb84c6cd6..4f1ab9764702e 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -35,9 +35,12 @@ import org.opensearch.common.util.ByteUtils; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; import java.util.Base64; import java.util.List; @@ -93,16 +96,21 @@ public AsyncTransferManager( * @param streamContext The {@link StreamContext} to supply streams during upload * @return A {@link CompletableFuture} to listen for upload completion */ - public CompletableFuture uploadObject(S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext) { + public CompletableFuture uploadObject( + S3AsyncClient s3AsyncClient, + UploadRequest uploadRequest, + StreamContext streamContext, + StatsMetricPublisher statsMetricPublisher + ) { CompletableFuture returnFuture = new CompletableFuture<>(); try { if (streamContext.getNumberOfParts() == 1) { log.debug(() -> "Starting the upload as a single upload part request"); - uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture); + uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture, statsMetricPublisher); } else { log.debug(() -> "Starting the upload as multipart upload request"); - uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture); + uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture, statsMetricPublisher); } } catch (Throwable throwable) { returnFuture.completeExceptionally(throwable); @@ -115,12 +123,14 @@ private void uploadInParts( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { CreateMultipartUploadRequest.Builder createMultipartUploadRequestBuilder = CreateMultipartUploadRequest.builder() .bucket(uploadRequest.getBucket()) - .key(uploadRequest.getKey()); + .key(uploadRequest.getKey()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)); if (uploadRequest.doRemoteDataIntegrityCheck()) { createMultipartUploadRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); } @@ -294,12 +304,14 @@ private void uploadInOneChunk( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, InputStreamContainer inputStreamContainer, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder() .bucket(uploadRequest.getBucket()) .key(uploadRequest.getKey()) - .contentLength(uploadRequest.getContentLength()); + .contentLength(uploadRequest.getContentLength()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.putObjectMetricPublisher)); if (uploadRequest.doRemoteDataIntegrityCheck()) { putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); @@ -312,15 +324,22 @@ private void uploadInOneChunk( } else { streamReadExecutor = executorService; } + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); CompletableFuture putObjectFuture = SocketAccess.doPrivileged( () -> s3AsyncClient.putObject( putObjectRequestBuilder.build(), - AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), - inputStreamContainer.getContentLength(), - streamReadExecutor - ) + AsyncRequestBody.fromInputStream(inputStream, inputStreamContainer.getContentLength(), streamReadExecutor) ).handle((resp, throwable) -> { + try { + inputStream.close(); + } catch (IOException e) { + log.error( + () -> new ParameterizedMessage("Failed to close stream while uploading single file {}.", uploadRequest.getKey()), + e + ); + } if (throwable != null) { Throwable unwrappedThrowable = ExceptionsHelper.unwrap(throwable, S3Exception.class); if (unwrappedThrowable != null) { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 4c8cfaa0e7e15..2437547a80a6f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -33,12 +33,18 @@ import org.opensearch.common.io.InputStreamContainer; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.blobstore.ZeroInputStream; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -71,17 +77,17 @@ public void testOneChunkUpload() { putObjectResponseCompletableFuture ); + AtomicReference streamRef = new AtomicReference<>(); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(1), WritePriority.HIGH, uploadSuccess -> { // do nothing }, false, null), - new StreamContext( - (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), - ByteSizeUnit.MB.toBytes(1), - ByteSizeUnit.MB.toBytes(1), - 1 - ) + new StreamContext((partIdx, partSize, position) -> { + streamRef.set(new ZeroInputStream(partSize)); + return new InputStreamContainer(streamRef.get(), partSize, position); + }, ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1), + new StatsMetricPublisher() ); try { @@ -91,6 +97,14 @@ public void testOneChunkUpload() { } verify(s3AsyncClient, times(1)).putObject(any(PutObjectRequest.class), any(AsyncRequestBody.class)); + + boolean closeError = false; + try { + streamRef.get().available(); + } catch (IOException e) { + closeError = e.getMessage().equals("Stream closed"); + } + assertTrue("InputStream was still open after upload", closeError); } public void testOneChunkUploadCorruption() { @@ -119,7 +133,8 @@ public void testOneChunkUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1 - ) + ), + new StatsMetricPublisher() ); try { @@ -160,17 +175,18 @@ public void testMultipartUpload() { abortMultipartUploadResponseCompletableFuture ); + List streams = new ArrayList<>(); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(5), WritePriority.HIGH, uploadSuccess -> { // do nothing }, true, 3376132981L), - new StreamContext( - (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), - ByteSizeUnit.MB.toBytes(1), - ByteSizeUnit.MB.toBytes(1), - 5 - ) + new StreamContext((partIdx, partSize, position) -> { + InputStream stream = new ZeroInputStream(partSize); + streams.add(stream); + return new InputStreamContainer(stream, partSize, position); + }, ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5), + new StatsMetricPublisher() ); try { @@ -179,6 +195,16 @@ public void testMultipartUpload() { fail("did not expect resultFuture to fail"); } + streams.forEach(stream -> { + boolean closeError = false; + try { + stream.available(); + } catch (IOException e) { + closeError = e.getMessage().equals("Stream closed"); + } + assertTrue("InputStream was still open after upload", closeError); + }); + verify(s3AsyncClient, times(1)).createMultipartUpload(any(CreateMultipartUploadRequest.class)); verify(s3AsyncClient, times(5)).uploadPart(any(UploadPartRequest.class), any(AsyncRequestBody.class)); verify(s3AsyncClient, times(1)).completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); @@ -220,7 +246,8 @@ public void testMultipartUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5 - ) + ), + new StatsMetricPublisher() ); try { diff --git a/release-notes/opensearch.release-notes-2.11.0.md b/release-notes/opensearch.release-notes-2.11.0.md index 7ebf1b433c7c6..040cc053469ed 100644 --- a/release-notes/opensearch.release-notes-2.11.0.md +++ b/release-notes/opensearch.release-notes-2.11.0.md @@ -5,6 +5,7 @@ ### Added - Add coordinator level stats for search latency ([#8386](https://github.com/opensearch-project/OpenSearch/issues/8386)) - Add metrics for thread_pool task wait time ([#9681](https://github.com/opensearch-project/OpenSearch/pull/9681)) +- Add parallel file download support for remote store based replication ([#8596](https://github.com/opensearch-project/OpenSearch/pull/8596)) - Async blob read support for S3 plugin ([#9694](https://github.com/opensearch-project/OpenSearch/pull/9694)) - [Telemetry-Otel] Added support for OtlpGrpcSpanExporter exporter ([#9666](https://github.com/opensearch-project/OpenSearch/pull/9666)) - Async blob read support for encrypted containers ([#10131](https://github.com/opensearch-project/OpenSearch/pull/10131)) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 6fcc89cfe9e9a..7304304e522f8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -86,10 +86,10 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( cluster().getClusterName(), getClusterState().metadata().clusterUUID() - ); + ).getIndices(); assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index ad3e99dd274ce..b8481610869e6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -58,7 +58,7 @@ protected void restore(String... indices) { ); } - protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { + protected void verifyRestoredData(Map indexStats, String indexName, boolean indexMoreData) throws Exception { ensureYellowAndNoInitializingShards(indexName); ensureGreen(indexName); // This is to ensure that shards that were already assigned will get latest count @@ -68,6 +68,8 @@ protected void verifyRestoredData(Map indexStats, String indexName 30, TimeUnit.SECONDS ); + if (indexMoreData == false) return; + IndexResponse response = indexSingleDoc(indexName); if (indexStats.containsKey(MAX_SEQ_NO_TOTAL + "-shard-" + response.getShardId().id())) { assertEquals(indexStats.get(MAX_SEQ_NO_TOTAL + "-shard-" + response.getShardId().id()) + 1, response.getSeqNo()); @@ -80,6 +82,10 @@ protected void verifyRestoredData(Map indexStats, String indexName ); } + protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { + verifyRestoredData(indexStats, indexName, true); + } + public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index e2ef5f85abc74..bccca283ba772 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -353,7 +353,13 @@ public void assertRemoteStoreRepositoryOnAllNodes(String repositoryName) { // Validated that all the restricted settings are entact on all the nodes. repository.getRestrictedSystemRepositorySettings() .stream() - .forEach(setting -> assertEquals(setting.get(actualRepository.settings()), setting.get(expectedRepository.settings()))); + .forEach( + setting -> assertEquals( + String.format(Locale.ROOT, "Restricted Settings mismatch [%s]", setting.getKey()), + setting.get(actualRepository.settings()), + setting.get(expectedRepository.settings()) + ) + ); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 5e92bb195680b..3a3e293de9b13 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -8,23 +8,36 @@ package org.opensearch.remotestore; -import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; -import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.action.admin.indices.template.put.PutIndexTemplateRequest; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.nio.file.Files; -import java.util.Locale; +import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.ExecutionException; +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_SETTING; +import static org.opensearch.cluster.metadata.Metadata.CLUSTER_READ_ONLY_BLOCK; +import static org.opensearch.cluster.metadata.Metadata.SETTING_READ_ONLY_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; import static org.opensearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; -import static org.opensearch.indices.ShardLimitValidator.SETTING_MAX_SHARDS_PER_CLUSTER_KEY; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT { @@ -48,47 +61,10 @@ private Map initialTestSetup(int shardCount, int replicaCount, int private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().stopAllNodes(); - addNewNodes(dataNodeCount, clusterManagerNodeCount); + internalCluster().startClusterManagerOnlyNodes(clusterManagerNodeCount); + internalCluster().startDataOnlyNodes(dataNodeCount); } - private void restoreAndValidate(String clusterUUID, Map indexStats) throws Exception { - restoreAndValidate(clusterUUID, indexStats, true); - } - - private void restoreAndValidate(String clusterUUID, Map indexStats, boolean validate) throws Exception { - // TODO once auto restore is merged, the remote cluster state will be restored - - if (validate) { - // Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - verifyRestoredData(indexStats, INDEX_NAME); - } - } - - private void restoreAndValidateFails( - String clusterUUID, - PlainActionFuture actionListener, - Class clazz, - String errorSubString - ) { - - try { - restoreAndValidate(clusterUUID, null, false); - } catch (Exception e) { - assertTrue( - String.format(Locale.ROOT, "%s %s", clazz, e), - clazz.isAssignableFrom(e.getClass()) - || clazz.isAssignableFrom(e.getCause().getClass()) - || (e.getCause().getCause() != null && clazz.isAssignableFrom(e.getCause().getCause().getClass())) - ); - assertTrue( - String.format(Locale.ROOT, "Error message mismatch. Expected: [%s]. Actual: [%s]", errorSubString, e.getMessage()), - e.getMessage().contains(errorSubString) - ); - } - } - - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -106,10 +82,10 @@ public void testFullClusterRestore() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore and validate - restoreAndValidate(prevClusterUUID, indexStats); + validateMetadata(List.of(INDEX_NAME)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -124,6 +100,7 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { Map indexStats2 = indexData(1, false, secondIndexName); assertEquals((shardCount + 1) * (replicaCount + 1), getNumShards(secondIndexName).totalNumShards); ensureGreen(secondIndexName); + updateIndexBlock(true, secondIndexName); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); @@ -134,155 +111,222 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore - restoreAndValidate(prevClusterUUID, indexStats); - ensureGreen(secondIndexName); - verifyRestoredData(indexStats2, secondIndexName); + validateMetadata(List.of(INDEX_NAME, secondIndexName)); + verifyRestoredData(indexStats, INDEX_NAME); + verifyRestoredData(indexStats2, secondIndexName, false); + assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); + assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); + // Test is complete + + // Remove the block to ensure proper cleanup + updateIndexBlock(false, secondIndexName); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreFailureValidationFailures() throws Exception { + public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; - // index some data to generate files in remote directory - Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Step - 1 index some data to generate files in remote directory + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - // Start of Test - 1 - // Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails("randomUUID", future, IllegalStateException.class, "Remote Cluster State not found - randomUUID"); - // End of Test - 1 - - // Start of Test - 3 - // Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - clusterService().state().metadata().clusterUUID(), - future, - IllegalArgumentException.class, - "clusterUUID to restore from should be different from current cluster UUID" - ); - // End of Test - 2 + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + String clusterName = clusterService().state().getClusterName().value(); - // Start of Test - 3 // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - // Restarting cluster with just 1 data node helps with applying cluster settings - resetCluster(1, clusterManagerNodeCount); - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - reduceShardLimits(1, 1); - - // Step - 4 Trigger full cluster restore and validate it fails - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalArgumentException.class, - "this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open" - ); - resetShardLimits(); - // End of Test - 3 + internalCluster().stopAllNodes(); + // Step - 3 Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + assertThrows(IllegalStateException.class, () -> addNewNodes(dataNodeCount, clusterManagerNodeCount)); + // Test is complete - // Start of Test - 4 - // Test -4 Reset cluster and trigger full restore with same name index in the cluster - // Test -4 Add required nodes for this test after last reset. - addNewNodes(dataNodeCount - 1, 0); + // Starting a node without remote state to ensure test cleanup + internalCluster().startNode(Settings.builder().put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false).build()); + } - newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + public void testRemoteStateFullRestart() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 3; - // Test -4 Step - 2 Create a new index with same name - createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); - ensureYellowAndNoInitializingShards(INDEX_NAME); + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) + + "/cluster-state/" + + prevClusterUUID + + "/manifest" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + internalCluster().fullRestart(); ensureGreen(INDEX_NAME); + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; + validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME); + } - future = PlainActionFuture.newFuture(); + private void validateMetadata(List indexNames) { + assertEquals(clusterService().state().metadata().indices().size(), indexNames.size()); + for (String indexName : indexNames) { + assertTrue(clusterService().state().metadata().hasIndex(indexName)); + } + } - // Test -4 Step - 3 Trigger full cluster restore and validate fails - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalStateException.class, - "cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster" + private void validateCurrentMetadata() throws Exception { + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); - - // Test -4 Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - // End of Test - 4 + assertBusy(() -> { + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + ClusterState clusterState = getClusterState(); + Metadata currentMetadata = clusterState.metadata(); + assertEquals(currentMetadata.indices().size(), manifest.getIndices().size()); + assertEquals(currentMetadata.coordinationMetadata().term(), manifest.getClusterTerm()); + assertEquals(clusterState.version(), manifest.getStateVersion()); + assertEquals(clusterState.stateUUID(), manifest.getStateUUID()); + assertEquals(currentMetadata.clusterUUIDCommitted(), manifest.isClusterUUIDCommitted()); + for (UploadedIndexMetadata uploadedIndexMetadata : manifest.getIndices()) { + IndexMetadata currentIndexMetadata = currentMetadata.index(uploadedIndexMetadata.getIndexName()); + assertEquals(currentIndexMetadata.getIndex().getUUID(), uploadedIndexMetadata.getIndexUUID()); + } + }); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { + public void testFullClusterRestoreGlobalMetadata() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; // Step - 1 index some data to generate files in remote directory - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Create global metadata - register a custom repo + // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 + // registerCustomRepository(); + + // Create global metadata - persistent settings + updatePersistentSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 34).build()); + + // Create global metadata - index template + putIndexTemplate(); + + // Create global metadata - Put cluster block + addClusterLevelReadOnlyBlock(); + // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata resetCluster(dataNodeCount, clusterManagerNodeCount); String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - // Step - 4 Delete index metadata file in remote - try { - Files.move( - segmentRepoPath.resolve( - RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) - + "/cluster-state/" - + prevClusterUUID - + "/index" - ), - segmentRepoPath.resolve("cluster-state/") - ); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // Step - 5 Trigger full cluster restore and validate fails - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails(prevClusterUUID, future, IllegalStateException.class, "asdsa"); + // Step - 3 Trigger full cluster restore and validate + // validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(); + verifyRestoredIndexTemplate(); + assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); + assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); + assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + // Test is complete + + // Remote the cluster read only block to ensure proper cleanup + updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); + assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); } - private void reduceShardLimits(int maxShardsPerNode, int maxShardsPerCluster) { - // Step 3 - Reduce shard limits to hit shard limit with less no of shards - try { + private void registerCustomRepository() { + assertAcked( client().admin() .cluster() - .updateSettings( - new ClusterUpdateSettingsRequest().transientSettings( - Settings.builder() - .put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode) - .put(SETTING_MAX_SHARDS_PER_CLUSTER_KEY, maxShardsPerCluster) - ) - ) - .get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + .preparePutRepository("custom-repo") + .setType("fs") + .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) + .get() + ); + } + + private void verifyRestoredRepositories() { + RepositoriesMetadata repositoriesMetadata = clusterService().state().metadata().custom(RepositoriesMetadata.TYPE); + assertEquals(2, repositoriesMetadata.repositories().size()); // includes remote store repo as well + assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); + assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); + // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 + // assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + // assertEquals(Settings.builder().put("location", randomRepoPath()).put("compress", false).build(), + // repositoriesMetadata.repository("custom-repo").settings()); + } + + private void addClusterLevelReadOnlyBlock() throws InterruptedException, ExecutionException { + updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), true).build()); + assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); } - private void resetShardLimits() { - // Step - 5 Reset the cluster settings + private void updatePersistentSettings(Settings settings) throws ExecutionException, InterruptedException { ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest(); - resetRequest.transientSettings( - Settings.builder().putNull(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()).putNull(SETTING_MAX_SHARDS_PER_CLUSTER_KEY) + resetRequest.persistentSettings(settings); + assertAcked(client().admin().cluster().updateSettings(resetRequest).get()); + } + + private void verifyRestoredIndexTemplate() { + Map indexTemplateMetadataMap = clusterService().state().metadata().templates(); + assertEquals(1, indexTemplateMetadataMap.size()); + assertEquals(Arrays.asList("pattern-1", "log-*"), indexTemplateMetadataMap.get("my-template").patterns()); + assertEquals( + Settings.builder() // <1> + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 1) + .build(), + indexTemplateMetadataMap.get("my-template").settings() ); + } - try { - client().admin().cluster().updateSettings(resetRequest).get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + private static void putIndexTemplate() { + PutIndexTemplateRequest request = new PutIndexTemplateRequest("my-template"); // <1> + request.patterns(Arrays.asList("pattern-1", "log-*")); // <2> + + request.settings( + Settings.builder() // <1> + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 1) + ); + assertTrue(client().admin().indices().putTemplate(request).actionGet().isAcknowledged()); } + private static void updateIndexBlock(boolean value, String secondIndexName) throws InterruptedException, ExecutionException { + assertAcked( + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(Settings.builder().put(INDEX_READ_ONLY_SETTING.getKey(), value).build(), secondIndexName) + ) + .get() + ); + } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 69efea186d927..0c8aa027e5f01 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -57,6 +57,7 @@ import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; import org.opensearch.node.NodesResourceUsageStats; +import org.opensearch.repositories.RepositoriesStats; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -146,6 +147,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private NodesResourceUsageStats resourceUsageStats; + @Nullable + private RepositoriesStats repositoriesStats; + public NodeStats(StreamInput in) throws IOException { super(in); timestamp = in.readVLong(); @@ -202,11 +206,16 @@ public NodeStats(StreamInput in) throws IOException { } else { searchPipelineStats = null; } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { resourceUsageStats = in.readOptionalWriteable(NodesResourceUsageStats::new); } else { resourceUsageStats = null; } + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); + } else { + repositoriesStats = null; + } } public NodeStats( @@ -234,7 +243,8 @@ public NodeStats( @Nullable WeightedRoutingStats weightedRoutingStats, @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, - @Nullable SearchPipelineStats searchPipelineStats + @Nullable SearchPipelineStats searchPipelineStats, + @Nullable RepositoriesStats repositoriesStats ) { super(node); this.timestamp = timestamp; @@ -261,6 +271,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.repositoriesStats = repositoriesStats; } public long getTimestamp() { @@ -403,6 +414,11 @@ public SearchPipelineStats getSearchPipelineStats() { return searchPipelineStats; } + @Nullable + public RepositoriesStats getRepositoriesStats() { + return repositoriesStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -446,9 +462,12 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeOptionalWriteable(searchPipelineStats); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(repositoriesStats); + } } @Override @@ -542,6 +561,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getRepositoriesStats() != null) { + getRepositoriesStats().toXContent(builder, params); + } return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 99c9fb2d1e26a..88dff20354aa2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -214,7 +214,8 @@ public enum Metric { FILE_CACHE_STATS("file_cache"), TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), - RESOURCE_USAGE_STATS("resource_usage_stats"); + RESOURCE_USAGE_STATS("resource_usage_stats"), + REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 204157236a282..aa02f8e580f4a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -125,7 +125,8 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.FILE_CACHE_STATS.containedIn(metrics), NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), - NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics) + NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index d8323e209be23..f51fabbfb2388 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -169,6 +169,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java index 2ee3e9557b354..0f6646d37f950 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java @@ -49,6 +49,9 @@ public interface BlobStore extends Closeable { */ BlobContainer blobContainer(BlobPath path); + /** + * Returns statistics on the count of operations that have been performed on this blob store + */ /** * Returns statistics on the count of operations that have been performed on this blob store */ @@ -56,8 +59,36 @@ default Map stats() { return Collections.emptyMap(); } + /** + * Returns details statistics of operations that have been performed on this blob store + */ + default Map> extendedStats() { + return Collections.emptyMap(); + } + /** * Reload the blob store inplace */ default void reload(RepositoryMetadata repositoryMetadata) {} + + /** + * Metrics for BlobStore interactions + */ + enum Metric { + REQUEST_SUCCESS("request_success_total"), + REQUEST_FAILURE("request_failures_total"), + REQUEST_LATENCY("request_time_in_millis"), + RETRY_COUNT("request_retry_count_total"); + + private String metricName; + + Metric(String name) { + this.metricName = name; + } + + public String metricName() { + return this.metricName; + } + } + } diff --git a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java index 4d2d69e473438..a18ca8b9d5c39 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java @@ -75,6 +75,16 @@ public Map stats() { return blobStore.stats(); } + /** + * Retrieves extended statistics about the BlobStore. Delegates the call to the underlying BlobStore's extendedStats() method. + * + * @return A map containing extended statistics about the BlobStore. + */ + @Override + public Map> extendedStats() { + return blobStore.extendedStats(); + } + /** * Closes the EncryptedBlobStore by decrementing the reference count of the CryptoManager and closing the * underlying BlobStore. This ensures proper cleanup of resources. diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index d3e7a0c482ee2..9eb7fb0ca04d0 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -158,38 +158,44 @@ public void start( PersistedState remotePersistedState = null; boolean success = false; try { - ClusterState clusterState = prepareInitialClusterState( + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) + .version(lastAcceptedVersion) + .metadata(metadata) + .build(); + + if (DiscoveryNode.isClusterManagerNode(settings) && isRemoteStoreClusterStateEnabled(settings)) { + // If the cluster UUID loaded from local is unknown (_na_) then fetch the best state from remote + // If there is no valid state on remote, continue with initial empty state + // If there is a valid state, then restore index metadata using this state + String lastKnownClusterUUID = ClusterState.UNKNOWN_UUID; + if (ClusterState.UNKNOWN_UUID.equals(clusterState.metadata().clusterUUID())) { + lastKnownClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote( + clusterState.getClusterName().value() + ); + if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { + // Load state from remote + final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( + clusterState, + lastKnownClusterUUID, + false, + new String[] {} + ); + clusterState = remoteRestoreResult.getClusterState(); + } + } + remotePersistedState = new RemotePersistedState(remoteClusterStateService, lastKnownClusterUUID); + } + + // Recovers Cluster and Index level blocks + clusterState = prepareInitialClusterState( transportService, clusterService, - ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) - .version(lastAcceptedVersion) - .metadata(upgradeMetadataForNode(metadata, metadataIndexUpgradeService, metadataUpgrader)) + ClusterState.builder(clusterState) + .metadata(upgradeMetadataForNode(clusterState.metadata(), metadataIndexUpgradeService, metadataUpgrader)) .build() ); if (DiscoveryNode.isClusterManagerNode(settings)) { - if (isRemoteStoreClusterStateEnabled(settings)) { - // If the cluster UUID loaded from local is unknown (_na_) then fetch the best state from remote - // If there is no valid state on remote, continue with initial empty state - // If there is a valid state, then restore index metadata using this state - String lastKnownClusterUUID = ClusterState.UNKNOWN_UUID; - if (ClusterState.UNKNOWN_UUID.equals(clusterState.metadata().clusterUUID())) { - lastKnownClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote( - clusterState.getClusterName().value() - ); - if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { - // Load state from remote - final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( - clusterState, - lastKnownClusterUUID, - false, - new String[] {} - ); - clusterState = remoteRestoreResult.getClusterState(); - } - } - remotePersistedState = new RemotePersistedState(remoteClusterStateService, lastKnownClusterUUID); - } persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState); } else { persistedState = new AsyncLucenePersistedState( diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 40b16f3d6323b..97b37d9532f85 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -33,6 +33,9 @@ */ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { + public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest. + public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file. + private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); private static final ParseField CLUSTER_UUID_FIELD = new ParseField("cluster_uuid"); @@ -40,6 +43,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { private static final ParseField OPENSEARCH_VERSION_FIELD = new ParseField("opensearch_version"); private static final ParseField NODE_ID_FIELD = new ParseField("node_id"); private static final ParseField COMMITTED_FIELD = new ParseField("committed"); + private static final ParseField CODEC_VERSION_FIELD = new ParseField("codec_version"); + private static final ParseField GLOBAL_METADATA_FIELD = new ParseField("global_metadata"); private static final ParseField INDICES_FIELD = new ParseField("indices"); private static final ParseField PREVIOUS_CLUSTER_UUID = new ParseField("previous_cluster_uuid"); private static final ParseField CLUSTER_UUID_COMMITTED = new ParseField("cluster_uuid_committed"); @@ -84,7 +89,33 @@ private static boolean clusterUUIDCommitted(Object[] fields) { return (boolean) fields[9]; } - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + private static int codecVersion(Object[] fields) { + return (int) fields[10]; + } + + private static String globalMetadataFileName(Object[] fields) { + return (String) fields[11]; + } + + private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> new ClusterMetadataManifest( + term(fields), + version(fields), + clusterUUID(fields), + stateUUID(fields), + opensearchVersion(fields), + nodeId(fields), + committed(fields), + CODEC_V0, + null, + indices(fields), + previousClusterUUID(fields), + clusterUUIDCommitted(fields) + ) + ); + + private static final ConstructingObjectParser PARSER_V1 = new ConstructingObjectParser<>( "cluster_metadata_manifest", fields -> new ClusterMetadataManifest( term(fields), @@ -94,29 +125,45 @@ private static boolean clusterUUIDCommitted(Object[] fields) { opensearchVersion(fields), nodeId(fields), committed(fields), + codecVersion(fields), + globalMetadataFileName(fields), indices(fields), previousClusterUUID(fields), clusterUUIDCommitted(fields) ) ); + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V1; + static { - PARSER.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); - PARSER.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); - PARSER.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); - PARSER.declareObjectArray( + declareParser(PARSER_V0, CODEC_V0); + declareParser(PARSER_V1, CODEC_V1); + } + + private static void declareParser(ConstructingObjectParser parser, long codec_version) { + parser.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); + parser.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); + parser.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); + parser.declareObjectArray( ConstructingObjectParser.constructorArg(), (p, c) -> UploadedIndexMetadata.fromXContent(p), INDICES_FIELD ); - PARSER.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + parser.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + + if (codec_version >= CODEC_V1) { + parser.declareInt(ConstructingObjectParser.constructorArg(), CODEC_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), GLOBAL_METADATA_FIELD); + } } + private final int codecVersion; + private final String globalMetadataFileName; private final List indices; private final long clusterTerm; private final long stateVersion; @@ -168,6 +215,14 @@ public boolean isClusterUUIDCommitted() { return clusterUUIDCommitted; } + public int getCodecVersion() { + return codecVersion; + } + + public String getGlobalMetadataFileName() { + return globalMetadataFileName; + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -176,6 +231,8 @@ public ClusterMetadataManifest( Version opensearchVersion, String nodeId, boolean committed, + int codecVersion, + String globalMetadataFileName, List indices, String previousClusterUUID, boolean clusterUUIDCommitted @@ -187,6 +244,8 @@ public ClusterMetadataManifest( this.opensearchVersion = opensearchVersion; this.nodeId = nodeId; this.committed = committed; + this.codecVersion = codecVersion; + this.globalMetadataFileName = globalMetadataFileName; this.indices = Collections.unmodifiableList(indices); this.previousClusterUUID = previousClusterUUID; this.clusterUUIDCommitted = clusterUUIDCommitted; @@ -203,6 +262,13 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.codecVersion = in.readInt(); + this.globalMetadataFileName = in.readString(); + } else { + this.codecVersion = CODEC_V0; // Default codec + this.globalMetadataFileName = null; + } } public static Builder builder() { @@ -231,6 +297,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endArray(); builder.field(PREVIOUS_CLUSTER_UUID.getPreferredName(), getPreviousClusterUUID()); builder.field(CLUSTER_UUID_COMMITTED.getPreferredName(), isClusterUUIDCommitted()); + if (onOrAfterCodecVersion(CODEC_V1)) { + builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); + builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); + } return builder; } @@ -246,6 +316,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeInt(codecVersion); + out.writeString(globalMetadataFileName); + } } @Override @@ -266,12 +340,16 @@ public boolean equals(Object o) { && Objects.equals(nodeId, that.nodeId) && Objects.equals(committed, that.committed) && Objects.equals(previousClusterUUID, that.previousClusterUUID) - && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted); + && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted) + && Objects.equals(globalMetadataFileName, that.globalMetadataFileName) + && Objects.equals(codecVersion, that.codecVersion); } @Override public int hashCode() { return Objects.hash( + codecVersion, + globalMetadataFileName, indices, clusterTerm, stateVersion, @@ -290,8 +368,16 @@ public String toString() { return Strings.toString(MediaTypeRegistry.JSON, this); } + public boolean onOrAfterCodecVersion(int codecVersion) { + return this.codecVersion >= codecVersion; + } + + public static ClusterMetadataManifest fromXContentV0(XContentParser parser) throws IOException { + return PARSER_V0.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { - return PARSER.parse(parser, null); + return CURRENT_PARSER.parse(parser, null); } /** @@ -301,6 +387,8 @@ public static ClusterMetadataManifest fromXContent(XContentParser parser) throws */ public static class Builder { + private String globalMetadataFileName; + private int codecVersion; private List indices; private long clusterTerm; private long stateVersion; @@ -317,6 +405,16 @@ public Builder indices(List indices) { return this; } + public Builder codecVersion(int codecVersion) { + this.codecVersion = codecVersion; + return this; + } + + public Builder globalMetadataFileName(String globalMetadataFileName) { + this.globalMetadataFileName = globalMetadataFileName; + return this; + } + public Builder clusterTerm(long clusterTerm) { this.clusterTerm = clusterTerm; return this; @@ -378,6 +476,8 @@ public Builder(ClusterMetadataManifest manifest) { this.opensearchVersion = manifest.opensearchVersion; this.nodeId = manifest.nodeId; this.committed = manifest.committed; + this.globalMetadataFileName = manifest.globalMetadataFileName; + this.codecVersion = manifest.codecVersion; this.indices = new ArrayList<>(manifest.indices); this.previousClusterUUID = manifest.previousClusterUUID; this.clusterUUIDCommitted = manifest.clusterUUIDCommitted; @@ -392,6 +492,8 @@ public ClusterMetadataManifest build() { opensearchVersion, nodeId, committed, + codecVersion, + globalMetadataFileName, indices, previousClusterUUID, clusterUUIDCommitted diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 0cf97de53d5f3..b9d06c8fbb1c1 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -15,6 +15,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; @@ -55,6 +57,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -80,7 +83,9 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); + // TODO make this two variable as dynamic setting [issue: #10688] public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000; + public static final int GLOBAL_METADATA_UPLOAD_WAIT_MILLIS = 20000; public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", @@ -88,11 +93,27 @@ public class RemoteClusterStateService implements Closeable { IndexMetadata::fromXContent ); + public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "metadata", + METADATA_NAME_FORMAT, + Metadata::fromXContent + ); + + /** + * Manifest format compatible with older codec v0, where codec version was missing. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V0 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV0); + + /** + * Manifest format compatible with codec v1, where we introduced codec versions/global metadata. + */ public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>( "cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContent ); + /** * Used to specify if cluster state metadata should be published to remote store */ @@ -105,9 +126,11 @@ public class RemoteClusterStateService implements Closeable { public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; public static final String INDEX_PATH_TOKEN = "index"; + public static final String GLOBAL_METADATA_PATH_TOKEN = "global-metadata"; public static final String MANIFEST_PATH_TOKEN = "manifest"; public static final String MANIFEST_FILE_PREFIX = "manifest"; - public static final String INDEX_METADATA_FILE_PREFIX = "metadata"; + public static final String METADATA_FILE_PREFIX = "metadata"; + public static final int SPLITED_MANIFEST_FILE_LENGTH = 6; // file name manifest__term__version__C/P__timestamp__codecversion private final String nodeId; private final Supplier repositoriesService; @@ -121,7 +144,17 @@ public class RemoteClusterStateService implements Closeable { private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; - public static final int MANIFEST_CURRENT_CODEC_VERSION = 1; + public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; + public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; + + // ToXContent Params with gateway mode. + // We are using gateway context mode to persist all custom metadata. + public static final ToXContent.Params FORMAT_PARAMS; + static { + Map params = new HashMap<>(1); + params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); + FORMAT_PARAMS = new ToXContent.MapParams(params); + } public RemoteClusterStateService( String nodeId, @@ -162,12 +195,22 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri return null; } + // TODO: we can upload global metadata and index metadata in parallel. [issue: #10645] + // Write globalMetadata + String globalMetadataFile = writeGlobalMetadata(clusterState); + // any validations before/after upload ? final List allUploadedIndexMetadata = writeIndexMetadataParallel( clusterState, new ArrayList<>(clusterState.metadata().indices().values()) ); - final ClusterMetadataManifest manifest = uploadManifest(clusterState, allUploadedIndexMetadata, previousClusterUUID, false); + final ClusterMetadataManifest manifest = uploadManifest( + clusterState, + allUploadedIndexMetadata, + previousClusterUUID, + globalMetadataFile, + false + ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( @@ -206,6 +249,22 @@ public ClusterMetadataManifest writeIncrementalMetadata( return null; } assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); + + // Write Global Metadata + final boolean updateGlobalMetadata = Metadata.isGlobalStateEquals( + previousClusterState.metadata(), + clusterState.metadata() + ) == false; + String globalMetadataFile; + // For migration case from codec V0 to V1, we have added null check on global metadata file, + // If file is empty and codec is 1 then write global metadata. + if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { + globalMetadataFile = writeGlobalMetadata(clusterState); + } else { + globalMetadataFile = previousManifest.getGlobalMetadataFileName(); + } + + // Write Index Metadata final Map previousStateIndexMetadataVersionByName = new HashMap<>(); for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion()); @@ -248,6 +307,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), + globalMetadataFile, false ); deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); @@ -273,6 +333,59 @@ public ClusterMetadataManifest writeIncrementalMetadata( return manifest; } + /** + * Uploads provided ClusterState's global Metadata to remote store in parallel. + * The call is blocking so the method waits for upload to finish and then return. + * + * @param clusterState current ClusterState + * @return String file name where globalMetadata file is stored. + */ + private String writeGlobalMetadata(ClusterState clusterState) throws IOException { + + AtomicReference result = new AtomicReference(); + final BlobContainer globalMetadataContainer = globalMetadataContainer( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + final String globalMetadataFilename = globalMetadataFileName(clusterState.metadata()); + + // latch to wait until upload is not finished + CountDownLatch latch = new CountDownLatch(1); + + LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { + logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); + result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); + }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); + + GLOBAL_METADATA_FORMAT.writeAsync( + clusterState.metadata(), + globalMetadataContainer, + globalMetadataFilename, + blobStoreRepository.getCompressor(), + completionListener, + FORMAT_PARAMS + ); + + try { + if (latch.await(GLOBAL_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + // TODO: We should add metrics where transfer is timing out. [Issue: #10687] + GlobalMetadataTransferException ex = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") + ); + throw ex; + } + } catch (InterruptedException ex) { + GlobalMetadataTransferException exception = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete - %s"), + ex + ); + Thread.currentThread().interrupt(); + throw exception; + } + + return result.get(); + } + /** * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. * @@ -381,7 +494,8 @@ private void writeIndexMetadataAsync( indexMetadataContainer, indexMetadataFilename, blobStoreRepository.getCompressor(), - completionListener + completionListener, + FORMAT_PARAMS ); } @@ -398,6 +512,7 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), + previousManifest.getGlobalMetadataFileName(), true ); deleteStaleClusterUUIDs(clusterState, committedManifest); @@ -426,6 +541,7 @@ private ClusterMetadataManifest uploadManifest( ClusterState clusterState, List uploadedIndexMetadata, String previousClusterUUID, + String globalClusterMetadataFileName, boolean committed ) throws IOException { synchronized (this) { @@ -438,6 +554,8 @@ private ClusterMetadataManifest uploadManifest( Version.CURRENT, nodeId, committed, + MANIFEST_CURRENT_CODEC_VERSION, + globalClusterMetadataFileName, uploadedIndexMetadata, previousClusterUUID, clusterState.metadata().clusterUUIDCommitted() @@ -469,6 +587,12 @@ private BlobContainer indexMetadataContainer(String clusterName, String clusterU .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(INDEX_PATH_TOKEN).add(indexUUID)); } + private BlobContainer globalMetadataContainer(String clusterName, String clusterUUID) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/ + return blobStoreRepository.blobStore() + .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(GLOBAL_METADATA_PATH_TOKEN)); + } + private BlobContainer manifestContainer(String clusterName, String clusterUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); @@ -510,7 +634,7 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { // version> return String.join( DELIMITER, - INDEX_METADATA_FILE_PREFIX, + METADATA_FILE_PREFIX, RemoteStoreUtils.invertLong(indexMetadata.getVersion()), RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last @@ -518,6 +642,17 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { ); } + private static String globalMetadataFileName(Metadata metadata) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/metadata______ + return String.join( + DELIMITER, + METADATA_FILE_PREFIX, + RemoteStoreUtils.invertLong(metadata.version()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + } + private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); } @@ -527,18 +662,18 @@ private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster + * @param clusterMetadataManifest manifest file of cluster * @return {@code Map} latest IndexUUID to IndexMetadata map */ - public Map getLatestIndexMetadata(String clusterName, String clusterUUID) throws IOException { - start(); - Map remoteIndexMetadata = new HashMap<>(); - Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!clusterMetadataManifest.isPresent()) { - throw new IllegalStateException("Latest index metadata is not present for the provided clusterUUID"); - } - assert Objects.equals(clusterUUID, clusterMetadataManifest.get().getClusterUUID()) + private Map getIndexMetadataMap( + String clusterName, + String clusterUUID, + ClusterMetadataManifest clusterMetadataManifest + ) { + assert Objects.equals(clusterUUID, clusterMetadataManifest.getClusterUUID()) : "Corrupt ClusterMetadataManifest found. Cluster UUID mismatch."; - for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.get().getIndices()) { + Map remoteIndexMetadata = new HashMap<>(); + for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.getIndices()) { IndexMetadata indexMetadata = getIndexMetadata(clusterName, clusterUUID, uploadedIndexMetadata); remoteIndexMetadata.put(uploadedIndexMetadata.getIndexUUID(), indexMetadata); } @@ -569,6 +704,52 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U } } + /** + * Fetch latest metadata from remote cluster state including global metadata and index metadata + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return {@link IndexMetadata} + */ + public Metadata getLatestMetadata(String clusterName, String clusterUUID) { + start(); + Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); + if (!clusterMetadataManifest.isPresent()) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) + ); + } + // Fetch Global Metadata + Metadata globalMetadata = getGlobalMetadata(clusterName, clusterUUID, clusterMetadataManifest.get()); + + // Fetch Index Metadata + Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); + + return Metadata.builder(globalMetadata).indices(indices).build(); + } + + private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { + String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); + try { + // Fetch Global metadata + if (globalMetadataFileName != null) { + String[] splitPath = globalMetadataFileName.split("/"); + return GLOBAL_METADATA_FORMAT.read( + globalMetadataContainer(clusterName, clusterUUID), + splitPath[splitPath.length - 1], + blobStoreRepository.getNamedXContentRegistry() + ); + } else { + return Metadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Global Metadata - %s", globalMetadataFileName), + e + ); + } + } + /** * Fetch latest ClusterMetadataManifest from remote state store * @@ -578,10 +759,7 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U */ public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) { Optional latestManifestFileName = getLatestManifestFileName(clusterName, clusterUUID); - if (latestManifestFileName.isPresent()) { - return Optional.of(fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, latestManifestFileName.get())); - } - return Optional.empty(); + return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); } /** @@ -601,7 +779,8 @@ public String getLastKnownUUIDFromRemote(String clusterName) { return validChain.get(0); } catch (IOException e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName) + String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName), + e ); } } @@ -622,7 +801,8 @@ private Map getLatestManifestForAllClusterUUIDs manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest)); } catch (Exception e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID) + String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID), + e ); } } @@ -788,7 +968,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) throws IllegalStateException { try { - return RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.read( + return getClusterMetadataManifestBlobStoreFormat(filename).read( manifestContainer(clusterName, clusterUUID), filename, blobStoreRepository.getNamedXContentRegistry() @@ -798,6 +978,29 @@ private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String cluste } } + private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat(String fileName) { + long codecVersion = getManifestCodecVersion(fileName); + if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { + return CLUSTER_METADATA_MANIFEST_FORMAT; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V0) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V0; + } + + throw new IllegalArgumentException("Cluster metadata manifest file is corrupted, don't have valid codec version"); + } + + private int getManifestCodecVersion(String fileName) { + String[] splitName = fileName.split(DELIMITER); + if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { + return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. + } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 + // is used. + return ClusterMetadataManifest.CODEC_V0; + } else { + throw new IllegalArgumentException("Manifest file name is corrupted"); + } + } + public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } @@ -816,6 +1019,20 @@ public IndexMetadataTransferException(String errorDesc, Throwable cause) { } } + /** + * Exception for GlobalMetadata transfer failures to remote + */ + static class GlobalMetadataTransferException extends RuntimeException { + + public GlobalMetadataTransferException(String errorDesc) { + super(errorDesc); + } + + public GlobalMetadataTransferException(String errorDesc, Throwable cause) { + super(errorDesc, cause); + } + } + /** * Purges all remote cluster state against provided cluster UUIDs * @@ -907,6 +1124,7 @@ private void deleteClusterMetadata( Set filesToKeep = new HashSet<>(); Set staleManifestPaths = new HashSet<>(); Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); activeManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( clusterName, @@ -915,6 +1133,7 @@ private void deleteClusterMetadata( ); clusterMetadataManifest.getIndices() .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); }); staleManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( @@ -923,6 +1142,14 @@ private void deleteClusterMetadata( blobMetadata.name() ); staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { + String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + globalMetadataSplitPath[globalMetadataSplitPath.length - 1] + ) + ); + } clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { staleIndexMetadataPaths.add( @@ -938,6 +1165,7 @@ private void deleteClusterMetadata( return; } + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); } catch (IllegalStateException e) { diff --git a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java index c929e7421b3d8..88bb855a6e70d 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java +++ b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java @@ -16,4 +16,5 @@ public interface AuthToken { String asAuthHeaderValue(); + } diff --git a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java index 3fef248ee6d3a..00e50a59e9486 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java +++ b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java @@ -14,46 +14,17 @@ public class OnBehalfOfClaims { private final String audience; - private final String subject; - private final Long expiration; - private final Long not_before; - private final Long issued_at; + private final Long expiration_seconds; /** * Constructor for OnBehalfOfClaims * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - * @param issued_at the issued_at time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before, Long issued_at) { - this.audience = aud; - this.subject = subject; - this.expiration = expiration; - this.not_before = not_before; - this.issued_at = issued_at; - } - - /** - * A constructor that sets a default issued at time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before) { - this(aud, subject, expiration, not_before, System.currentTimeMillis() / 1000); - } + * @param expiration_seconds the length of time in seconds the token is valid - /** - * A constructor which sets a default not before time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token */ - public OnBehalfOfClaims(String aud, String subject, Long expiration) { - this(aud, subject, expiration, System.currentTimeMillis() / 1000); + public OnBehalfOfClaims(String aud, Long expiration_seconds) { + this.audience = aud; + this.expiration_seconds = expiration_seconds; } /** @@ -62,26 +33,14 @@ public OnBehalfOfClaims(String aud, String subject, Long expiration) { * @param subject the subject of the token */ public OnBehalfOfClaims(String aud, String subject) { - this(aud, subject, System.currentTimeMillis() / 1000 + 300); + this(aud, 300L); } public String getAudience() { return audience; } - public String getSubject() { - return subject; - } - public Long getExpiration() { - return expiration; - } - - public Long getNot_before() { - return not_before; - } - - public Long getIssued_at() { - return issued_at; + return expiration_seconds; } } diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 94fd08b99ac58..ac9cf35d1d8e5 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -10,15 +10,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.Version; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.MetadataCreateIndexService; import org.opensearch.cluster.metadata.MetadataIndexUpgradeService; +import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; @@ -27,6 +28,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.UUIDs; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; @@ -42,11 +44,13 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; /** * Service responsible for restoring index data from remote store @@ -136,16 +140,16 @@ public RemoteRestoreResult restore( String[] indexNames ) { Map> indexMetadataMap = new HashMap<>(); + Metadata remoteMetadata = null; boolean metadataFromRemoteStore = (restoreClusterUUID == null || restoreClusterUUID.isEmpty() || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { - remoteClusterStateService.getLatestIndexMetadata(currentState.getClusterName().value(), restoreClusterUUID) - .values() - .forEach(indexMetadata -> { - indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); - }); + remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); + remoteMetadata.getIndices().values().forEach(indexMetadata -> { + indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); + }); } catch (Exception e) { throw new IllegalStateException("Unable to restore remote index metadata", e); } @@ -160,7 +164,7 @@ public RemoteRestoreResult restore( } } validate(currentState, indexMetadataMap, restoreClusterUUID, restoreAllShards); - return executeRestore(currentState, indexMetadataMap, restoreAllShards); + return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); } /** @@ -173,7 +177,8 @@ public RemoteRestoreResult restore( private RemoteRestoreResult executeRestore( ClusterState currentState, Map> indexMetadataMap, - boolean restoreAllShards + boolean restoreAllShards, + Metadata remoteMetadata ) { final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); @@ -226,6 +231,10 @@ private RemoteRestoreResult executeRestore( totalShards += updatedIndexMetadata.getNumberOfShards(); } + if (remoteMetadata != null) { + restoreGlobalMetadata(mdBuilder, remoteMetadata); + } + RestoreInfo restoreInfo = new RestoreInfo("remote_store", indicesToBeRestored, totalShards, totalShards); RoutingTable rt = rtBuilder.build(); @@ -233,6 +242,36 @@ private RemoteRestoreResult executeRestore( return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); } + private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { + if (remoteMetadata.persistentSettings() != null) { + Settings settings = remoteMetadata.persistentSettings(); + clusterService.getClusterSettings().validateUpdate(settings); + mdBuilder.persistentSettings(settings); + } + if (remoteMetadata.templates() != null) { + for (final IndexTemplateMetadata cursor : remoteMetadata.templates().values()) { + mdBuilder.put(cursor); + } + } + if (remoteMetadata.customs() != null) { + for (final Map.Entry cursor : remoteMetadata.customs().entrySet()) { + if (RepositoriesMetadata.TYPE.equals(cursor.getKey()) == false) { + mdBuilder.putCustom(cursor.getKey(), cursor.getValue()); + } + } + } + Optional repositoriesMetadata = Optional.ofNullable(remoteMetadata.custom(RepositoriesMetadata.TYPE)); + repositoriesMetadata = repositoriesMetadata.map( + repositoriesMetadata1 -> new RepositoriesMetadata( + repositoriesMetadata1.repositories() + .stream() + .filter(repository -> SYSTEM_REPOSITORY_SETTING.get(repository.settings()) == false) + .collect(Collectors.toList()) + ) + ); + repositoriesMetadata.ifPresent(metadata -> mdBuilder.putCustom(RepositoriesMetadata.TYPE, metadata)); + } + /** * Performs various validations needed before executing restore * @param currentState current cluster state @@ -297,8 +336,6 @@ private void validate( throw new IllegalStateException(finalErrorMsg); } - Version minIndexCompatibilityVersion = currentState.getNodes().getMaxNodeVersion().minimumIndexCompatibilityVersion(); - metadataIndexUpgradeService.upgradeIndexMetadata(indexMetadata, minIndexCompatibilityVersion); boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(indexMetadata.getSettings()); createIndexService.validateIndexName(indexName, currentState); createIndexService.validateDotIndex(indexName, isHidden); diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index 9dcd16c53e6f3..02fc8feefd698 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -22,6 +22,7 @@ import org.opensearch.transport.TransportService; import java.util.List; +import java.util.function.BiConsumer; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_CHECKPOINT_INFO; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_SEGMENT_FILES; @@ -80,8 +81,13 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { + // fileProgressTracker is a no-op for node to node recovery + // MultiFileWriter takes care of progress tracking for downloads in this scenario + // TODO: Move state management and tracking into replication methods and use chunking and data + // copy mechanisms only from MultiFileWriter final Writeable.Reader reader = GetSegmentFilesResponse::new; final ActionListener responseListener = ActionListener.map(listener, r -> r); final GetSegmentFilesRequest request = new GetSegmentFilesRequest( diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index d2000a56401f5..12eabf1e6554f 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -29,6 +29,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; import java.util.stream.Collectors; /** @@ -95,6 +96,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try { @@ -117,7 +119,12 @@ public void getSegmentFiles( assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; toDownloadSegmentNames.add(file); } - indexShard.getFileDownloader().download(remoteDirectory, storeDirectory, toDownloadSegmentNames); + indexShard.getFileDownloader() + .download( + remoteDirectory, + new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), + toDownloadSegmentNames + ); logger.debug("Downloaded segment files from remote store {}", filesToFetch); } finally { indexShard.store().decRef(); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java index 6676b5b667e42..24f0cb15ddb25 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java @@ -8,13 +8,19 @@ package org.opensearch.indices.replication; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.util.CancellableThreads.ExecutionCancelledException; import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import java.io.IOException; import java.util.List; +import java.util.function.BiConsumer; /** * Represents the source of a replication event. @@ -39,6 +45,7 @@ public interface SegmentReplicationSource { * @param checkpoint {@link ReplicationCheckpoint} Checkpoint to fetch metadata for. * @param filesToFetch {@link List} List of files to fetch. * @param indexShard {@link IndexShard} Reference to the IndexShard. + * @param fileProgressTracker {@link BiConsumer} A consumer that updates the replication progress for shard files. * @param listener {@link ActionListener} Listener that completes with the list of files copied. */ void getSegmentFiles( @@ -46,6 +53,7 @@ void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); @@ -58,4 +66,69 @@ void getSegmentFiles( * Cancel any ongoing requests, should resolve any ongoing listeners with onFailure with a {@link ExecutionCancelledException}. */ default void cancel() {} + + /** + * Directory wrapper that records copy process for replication statistics + * + * @opensearch.internal + */ + final class ReplicationStatsDirectoryWrapper extends FilterDirectory { + private final BiConsumer fileProgressTracker; + + ReplicationStatsDirectoryWrapper(Directory in, BiConsumer fileProgressTracker) { + super(in); + this.fileProgressTracker = fileProgressTracker; + } + + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { + // here we wrap the index input form the source directory to report progress of file copy for the recovery stats. + // we increment the num bytes recovered in the readBytes method below, if users pull statistics they can see immediately + // how much has been recovered. + in.copyFrom(new FilterDirectory(from) { + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + final IndexInput input = in.openInput(name, context); + return new IndexInput("StatsDirectoryWrapper(" + input.toString() + ")") { + @Override + public void close() throws IOException { + input.close(); + } + + @Override + public long getFilePointer() { + throw new UnsupportedOperationException("only straight copies are supported"); + } + + @Override + public void seek(long pos) throws IOException { + throw new UnsupportedOperationException("seeks are not supported"); + } + + @Override + public long length() { + return input.length(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + throw new UnsupportedOperationException("slices are not supported"); + } + + @Override + public byte readByte() throws IOException { + throw new UnsupportedOperationException("use a buffer if you wanna perform well"); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + // we rely on the fact that copyFrom uses a buffer + input.readBytes(b, offset, len); + fileProgressTracker.accept(dest, (long) len); + } + }; + } + }, src, dest, context); + } + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 0eb6ce36fa63d..cd6dbe8af90d9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -170,7 +170,14 @@ public void startReplication(ActionListener listener) { final List filesToFetch = getFiles(checkpointInfo); state.setStage(SegmentReplicationState.Stage.GET_FILES); cancellableThreads.checkForCancel(); - source.getSegmentFiles(getId(), checkpointInfo.getCheckpoint(), filesToFetch, indexShard, getFilesListener); + source.getSegmentFiles( + getId(), + checkpointInfo.getCheckpoint(), + filesToFetch, + indexShard, + this::updateFileRecoveryBytes, + getFilesListener + ); }, listener::onFailure); getFilesListener.whenComplete(response -> { @@ -240,6 +247,20 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { } } + /** + * Updates the state to reflect recovery progress for the given file and + * updates the last access time for the target. + * @param fileName Name of the file being downloaded + * @param bytesRecovered Number of bytes recovered + */ + private void updateFileRecoveryBytes(String fileName, long bytesRecovered) { + ReplicationLuceneIndex index = state.getIndex(); + if (index != null) { + index.addRecoveredBytesToFile(fileName, bytesRecovered); + } + setLastAccessTime(); + } + private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) throws OpenSearchCorruptionException { cancellableThreads.checkForCancel(); state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java index 4e2e9f280d765..8446ab0dd6166 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java @@ -335,7 +335,7 @@ public DeviceStats(StreamInput in) throws IOException { previousSectorsRead = in.readLong(); currentSectorsWritten = in.readLong(); previousSectorsWritten = in.readLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { currentReadTime = in.readLong(); previousReadTime = in.readLong(); currentWriteTime = in.readLong(); @@ -369,7 +369,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(previousSectorsRead); out.writeLong(currentSectorsWritten); out.writeLong(previousSectorsWritten); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeLong(currentReadTime); out.writeLong(previousReadTime); out.writeLong(currentWriteTime); @@ -533,7 +533,7 @@ public IoStats(StreamInput in) throws IOException { this.totalWriteOperations = in.readLong(); this.totalReadKilobytes = in.readLong(); this.totalWriteKilobytes = in.readLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.totalReadTime = in.readLong(); this.totalWriteTime = in.readLong(); this.totalQueueSize = in.readLong(); @@ -557,7 +557,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalWriteOperations); out.writeLong(totalReadKilobytes); out.writeLong(totalWriteKilobytes); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeLong(totalReadTime); out.writeLong(totalWriteTime); out.writeLong(totalQueueSize); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 69b80462bbf0b..c9148f382a028 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1115,7 +1115,8 @@ protected Node( searchPipelineService, fileCache, taskCancellationMonitoringService, - resourceUsageCollectorService + resourceUsageCollectorService, + repositoryService ); final SearchService searchService = newSearchService( diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index 9bb07080fa717..e2d7bc2c86ba3 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -53,6 +53,7 @@ import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; import org.opensearch.plugins.PluginsService; +import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.AggregationUsageService; import org.opensearch.search.backpressure.SearchBackpressureService; @@ -93,6 +94,7 @@ public class NodeService implements Closeable { private final Discovery discovery; private final FileCache fileCache; private final TaskCancellationMonitoringService taskCancellationMonitoringService; + private final RepositoriesService repositoriesService; NodeService( Settings settings, @@ -116,7 +118,8 @@ public class NodeService implements Closeable { SearchPipelineService searchPipelineService, FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, - ResourceUsageCollectorService resourceUsageCollectorService + ResourceUsageCollectorService resourceUsageCollectorService, + RepositoriesService repositoriesService ) { this.settings = settings; this.threadPool = threadPool; @@ -140,6 +143,7 @@ public class NodeService implements Closeable { this.fileCache = fileCache; this.taskCancellationMonitoringService = taskCancellationMonitoringService; this.resourceUsageCollectorService = resourceUsageCollectorService; + this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); } @@ -221,7 +225,8 @@ public NodeStats stats( boolean fileCacheStats, boolean taskCancellation, boolean searchPipelineStats, - boolean resourceUsageStats + boolean resourceUsageStats, + boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will // only be applied to the sensible ones to use, like refresh/merge/flush/indexing stats) @@ -250,7 +255,8 @@ public NodeStats stats( weightedRoutingStats ? WeightedRoutingStats.getInstance() : null, fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, - searchPipelineStats ? this.searchPipelineService.stats() : null + searchPipelineStats ? this.searchPipelineService.stats() : null, + repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 72266c053a1ae..68669feb16abc 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -457,7 +457,6 @@ public void applyClusterState(ClusterChangedEvent event) { logger.debug("unregistering repository [{}]", entry.getKey()); Repository repository = entry.getValue(); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); } else { survivors.put(entry.getKey(), entry.getValue()); } @@ -485,7 +484,6 @@ public void applyClusterState(ClusterChangedEvent event) { } else { logger.debug("updating repository [{}]", repositoryMetadata.name()); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); repository = null; try { repository = createRepository(repositoryMetadata, typesRegistry); @@ -575,12 +573,12 @@ public Repository repository(String repositoryName) { } public List repositoriesStats() { - List archivedRepoStats = repositoriesStatsArchive.getArchivedStats(); List activeRepoStats = getRepositoryStatsForActiveRepositories(); + return activeRepoStats; + } - List repositoriesStats = new ArrayList<>(archivedRepoStats); - repositoriesStats.addAll(activeRepoStats); - return repositoriesStats; + public RepositoriesStats getRepositoriesStats() { + return new RepositoriesStats(repositoriesStats()); } private List getRepositoryStatsForActiveRepositories() { @@ -640,15 +638,6 @@ public void closeRepository(Repository repository) { repository.close(); } - private void archiveRepositoryStats(Repository repository, long clusterStateVersion) { - if (repository instanceof MeteredBlobStoreRepository) { - RepositoryStatsSnapshot stats = ((MeteredBlobStoreRepository) repository).statsSnapshotForArchival(clusterStateVersion); - if (repositoriesStatsArchive.archive(stats) == false) { - logger.warn("Unable to archive the repository stats [{}] as the archive is full.", stats); - } - } - } - /** * Creates repository holder. This method starts the non-internal repository */ diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java new file mode 100644 index 0000000000000..b24e0dddd852a --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.util.CollectionUtils; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +/** + * Encapsulates stats for multiple repositories* + */ +public class RepositoriesStats implements Writeable, ToXContentObject { + + List repositoryStatsSnapshots; + + public RepositoriesStats(List repositoryStatsSnapshots) { + this.repositoryStatsSnapshots = repositoryStatsSnapshots; + } + + public RepositoriesStats(StreamInput in) throws IOException { + this.repositoryStatsSnapshots = in.readList(RepositoryStatsSnapshot::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(repositoryStatsSnapshots); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startArray("repositories"); + if (CollectionUtils.isEmpty(repositoryStatsSnapshots) == false) { + for (RepositoryStatsSnapshot repositoryStatsSnapshot : repositoryStatsSnapshots) { + repositoryStatsSnapshot.toXContent(builder, params); + } + } + builder.endArray(); + return builder; + } +} diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java index b8f100706f81e..3d35f75176eaf 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java @@ -70,11 +70,6 @@ public RepositoriesStatsArchive(TimeValue retentionPeriod, int maxCapacity, Long * @return {@code true} if the repository stats were archived, {@code false} otherwise. */ synchronized boolean archive(final RepositoryStatsSnapshot repositoryStats) { - assert containsRepositoryStats(repositoryStats) == false : "A repository with ephemeral id " - + repositoryStats.getRepositoryInfo().ephemeralId - + " is already archived"; - assert repositoryStats.isArchived(); - evict(); if (archive.size() >= maxCapacity) { @@ -116,15 +111,6 @@ private void evict() { } } - private boolean containsRepositoryStats(RepositoryStatsSnapshot repositoryStats) { - return archive.stream() - .anyMatch( - entry -> entry.repositoryStatsSnapshot.getRepositoryInfo().ephemeralId.equals( - repositoryStats.getRepositoryInfo().ephemeralId - ) - ); - } - private static class ArchiveEntry { private final RepositoryStatsSnapshot repositoryStatsSnapshot; private final long createdAtMillis; diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java index 8aa86fc46d591..387a685bd6526 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.Nullable; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -51,64 +50,27 @@ * @opensearch.internal */ public final class RepositoryInfo implements Writeable, ToXContentFragment { - public final String ephemeralId; public final String name; public final String type; public final Map location; - public final long startedAt; - @Nullable - public final Long stoppedAt; - public RepositoryInfo(String ephemeralId, String name, String type, Map location, long startedAt) { - this(ephemeralId, name, type, location, startedAt, null); - } - - public RepositoryInfo( - String ephemeralId, - String name, - String type, - Map location, - long startedAt, - @Nullable Long stoppedAt - ) { - this.ephemeralId = ephemeralId; + public RepositoryInfo(String name, String type, Map location) { this.name = name; this.type = type; this.location = location; - this.startedAt = startedAt; - if (stoppedAt != null && startedAt > stoppedAt) { - throw new IllegalArgumentException("createdAt must be before or equal to stoppedAt"); - } - this.stoppedAt = stoppedAt; } public RepositoryInfo(StreamInput in) throws IOException { - this.ephemeralId = in.readString(); this.name = in.readString(); this.type = in.readString(); this.location = in.readMap(StreamInput::readString, StreamInput::readString); - this.startedAt = in.readLong(); - this.stoppedAt = in.readOptionalLong(); - } - - public RepositoryInfo stopped(long stoppedAt) { - assert isStopped() == false : "The repository is already stopped"; - - return new RepositoryInfo(ephemeralId, name, type, location, startedAt, stoppedAt); - } - - public boolean isStopped() { - return stoppedAt != null; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(ephemeralId); out.writeString(name); out.writeString(type); out.writeMap(location, StreamOutput::writeString, StreamOutput::writeString); - out.writeLong(startedAt); - out.writeOptionalLong(stoppedAt); } @Override @@ -116,11 +78,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("repository_name", name); builder.field("repository_type", type); builder.field("repository_location", location); - builder.field("repository_ephemeral_id", ephemeralId); - builder.field("repository_started_at", startedAt); - if (stoppedAt != null) { - builder.field("repository_stopped_at", stoppedAt); - } return builder; } @@ -129,17 +86,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryInfo that = (RepositoryInfo) o; - return ephemeralId.equals(that.ephemeralId) - && name.equals(that.name) - && type.equals(that.type) - && location.equals(that.location) - && startedAt == that.startedAt - && Objects.equals(stoppedAt, that.stoppedAt); + return name.equals(that.name) && type.equals(that.type) && location.equals(that.location); } @Override public int hashCode() { - return Objects.hash(ephemeralId, name, type, location, startedAt, stoppedAt); + return Objects.hash(name, type, location); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java index efd5d6f8560b6..ab97c5eaa1f7a 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java @@ -32,9 +32,13 @@ package org.opensearch.repositories; +import org.opensearch.common.Nullable; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; import java.util.Collections; @@ -47,32 +51,63 @@ * * @opensearch.internal */ -public class RepositoryStats implements Writeable { +public class RepositoryStats implements Writeable, ToXContentFragment { public static final RepositoryStats EMPTY_STATS = new RepositoryStats(Collections.emptyMap()); + @Nullable public final Map requestCounts; + @Nullable + public final Map> extendedStats; + public final boolean detailed; public RepositoryStats(Map requestCounts) { this.requestCounts = Collections.unmodifiableMap(requestCounts); + this.extendedStats = Collections.emptyMap(); + this.detailed = false; + } + + public RepositoryStats(Map> extendedStats, boolean detailed) { + this.requestCounts = Collections.emptyMap(); + this.extendedStats = Collections.unmodifiableMap(extendedStats); + this.detailed = detailed; } public RepositoryStats(StreamInput in) throws IOException { this.requestCounts = in.readMap(StreamInput::readString, StreamInput::readLong); + this.extendedStats = in.readMap( + e -> e.readEnum(BlobStore.Metric.class), + i -> i.readMap(StreamInput::readString, StreamInput::readLong) + ); + this.detailed = in.readBoolean(); } public RepositoryStats merge(RepositoryStats otherStats) { - final Map result = new HashMap<>(); - result.putAll(requestCounts); - for (Map.Entry entry : otherStats.requestCounts.entrySet()) { - result.merge(entry.getKey(), entry.getValue(), Math::addExact); + assert this.detailed == otherStats.detailed; + if (detailed) { + final Map> result = new HashMap<>(); + result.putAll(extendedStats); + for (Map.Entry> entry : otherStats.extendedStats.entrySet()) { + for (Map.Entry nested : entry.getValue().entrySet()) { + result.get(entry.getKey()).merge(nested.getKey(), nested.getValue(), Math::addExact); + } + } + return new RepositoryStats(result, true); + } else { + final Map result = new HashMap<>(); + result.putAll(requestCounts); + for (Map.Entry entry : otherStats.requestCounts.entrySet()) { + result.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + return new RepositoryStats(result); } - return new RepositoryStats(result); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(requestCounts, StreamOutput::writeString, StreamOutput::writeLong); + out.writeMap(extendedStats, StreamOutput::writeEnum, (o, v) -> o.writeMap(v, StreamOutput::writeString, StreamOutput::writeLong)); + out.writeBoolean(detailed); } @Override @@ -80,16 +115,32 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryStats that = (RepositoryStats) o; - return requestCounts.equals(that.requestCounts); + return requestCounts.equals(that.requestCounts) && extendedStats.equals(that.extendedStats) && detailed == that.detailed; } @Override public int hashCode() { - return Objects.hash(requestCounts); + return Objects.hash(requestCounts, detailed, extendedStats); } @Override public String toString() { - return "RepositoryStats{" + "requestCounts=" + requestCounts + '}'; + return "RepositoryStats{" + "requestCounts=" + requestCounts + "extendedStats=" + extendedStats + "detailed =" + detailed + "}"; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (detailed == false) { + builder.field("request_counts", requestCounts); + } else { + extendedStats.forEach((k, v) -> { + try { + builder.field(k.metricName(), v); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + return builder; } } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java index 2b061cd2c2cc9..0a727980fad0d 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java @@ -53,21 +53,17 @@ public final class RepositoryStatsSnapshot implements Writeable, ToXContentObjec private final RepositoryInfo repositoryInfo; private final RepositoryStats repositoryStats; private final long clusterVersion; - private final boolean archived; - public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion, boolean archived) { - assert archived != (clusterVersion == UNKNOWN_CLUSTER_VERSION); + public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion) { this.repositoryInfo = repositoryInfo; this.repositoryStats = repositoryStats; this.clusterVersion = clusterVersion; - this.archived = archived; } public RepositoryStatsSnapshot(StreamInput in) throws IOException { this.repositoryInfo = new RepositoryInfo(in); this.repositoryStats = new RepositoryStats(in); this.clusterVersion = in.readLong(); - this.archived = in.readBoolean(); } public RepositoryInfo getRepositoryInfo() { @@ -78,10 +74,6 @@ public RepositoryStats getRepositoryStats() { return repositoryStats; } - public boolean isArchived() { - return archived; - } - public long getClusterVersion() { return clusterVersion; } @@ -91,18 +83,13 @@ public void writeTo(StreamOutput out) throws IOException { repositoryInfo.writeTo(out); repositoryStats.writeTo(out); out.writeLong(clusterVersion); - out.writeBoolean(archived); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); repositoryInfo.toXContent(builder, params); - builder.field("request_counts", repositoryStats.requestCounts); - builder.field("archived", archived); - if (archived) { - builder.field("cluster_version", clusterVersion); - } + repositoryStats.toXContent(builder, params); builder.endObject(); return builder; } @@ -114,13 +101,12 @@ public boolean equals(Object o) { RepositoryStatsSnapshot that = (RepositoryStatsSnapshot) o; return repositoryInfo.equals(that.repositoryInfo) && repositoryStats.equals(that.repositoryStats) - && clusterVersion == that.clusterVersion - && archived == that.archived; + && clusterVersion == that.clusterVersion; } @Override public int hashCode() { - return Objects.hash(repositoryInfo, repositoryStats, clusterVersion, archived); + return Objects.hash(repositoryInfo, repositoryStats, clusterVersion); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 69883e0d19c8d..8a2260e1f6d90 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -173,6 +173,7 @@ import java.util.stream.Stream; import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName; +import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS; /** * BlobStore - based implementation of Snapshot Repository @@ -850,6 +851,8 @@ public RepositoryStats stats() { final BlobStore store = blobStore.get(); if (store == null) { return RepositoryStats.EMPTY_STATS; + } else if (store.extendedStats() != null && store.extendedStats().isEmpty() == false) { + return new RepositoryStats(store.extendedStats(), true); } return new RepositoryStats(store.stats()); } @@ -3333,7 +3336,12 @@ private void writeShardIndexBlobAtomic( () -> new ParameterizedMessage("[{}] Writing shard index [{}] to [{}]", metadata.name(), indexGeneration, shardContainer.path()) ); final String blobName = INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(String.valueOf(indexGeneration)); - writeAtomic(shardContainer, blobName, INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor), true); + writeAtomic( + shardContainer, + blobName, + INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS), + true + ); } // Unused blobs are all previous index-, data- and meta-blobs and that are not referenced by the new index- as well as all diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 846f506a65bea..d2c51ceb8ecd5 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -83,7 +83,7 @@ public final class ChecksumBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization - private static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; + public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; static { Map snapshotOnlyParams = new HashMap<>(); @@ -171,7 +171,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } @@ -184,13 +184,15 @@ public void write(final T obj, final BlobContainer blobContainer, final String n * @param name blob name * @param compressor whether to use compression * @param listener listener to listen to write result + * @param params ToXContent params */ public void writeAsync( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, - ActionListener listener + ActionListener listener, + final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { write(obj, blobContainer, name, compressor); @@ -198,7 +200,7 @@ public void writeAsync( return; } final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, params); final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; @@ -230,7 +232,8 @@ public void writeAsync( } } - public BytesReference serialize(final T obj, final String blobName, final Compressor compressor) throws IOException { + public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) + throws IOException { try (BytesStreamOutput outputStream = new BytesStreamOutput()) { try ( OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( @@ -254,7 +257,7 @@ public void close() throws IOException { ) ) { builder.startObject(); - obj.toXContent(builder, SNAPSHOT_ONLY_FORMAT_PARAMS); + obj.toXContent(builder, params); builder.endObject(); } CodecUtil.writeFooter(indexOutput); diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java index d4921f4e6d2e7..0651ff586d412 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java @@ -34,12 +34,10 @@ import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.UUIDs; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.repositories.RepositoryInfo; import org.opensearch.repositories.RepositoryStatsSnapshot; -import org.opensearch.threadpool.ThreadPool; import java.util.Map; @@ -59,14 +57,7 @@ public MeteredBlobStoreRepository( Map location ) { super(metadata, namedXContentRegistry, clusterService, recoverySettings); - ThreadPool threadPool = clusterService.getClusterApplierService().threadPool(); - this.repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), - metadata.name(), - metadata.type(), - location, - threadPool.absoluteTimeInMillis() - ); + this.repositoryInfo = new RepositoryInfo(metadata.name(), metadata.type(), location); } @Override @@ -78,11 +69,6 @@ public void reload(RepositoryMetadata repositoryMetadata) { } public RepositoryStatsSnapshot statsSnapshot() { - return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION, false); - } - - public RepositoryStatsSnapshot statsSnapshotForArchival(long clusterVersion) { - RepositoryInfo stoppedRepoInfo = repositoryInfo.stopped(threadPool.absoluteTimeInMillis()); - return new RepositoryStatsSnapshot(stoppedRepoInfo, stats(), clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION); } } diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 4b865383ee007..3491f18da9550 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -852,6 +852,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { weightedRoutingStats, null, null, + null, null ); } diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 64949cf861f70..6f03e87bf5824 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -191,6 +191,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -218,6 +219,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -245,6 +247,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -303,6 +306,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -330,6 +334,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -357,6 +362,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index f37823d2c0c7d..1c0dc7fc1ca2d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -938,6 +938,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep Version.CURRENT, randomAlphaOfLength(10), false, + 1, + randomAlphaOfLength(10), Collections.emptyList(), randomAlphaOfLength(10), true diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index c7ed1cb732154..1d5c2a0f01b5c 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -840,7 +840,8 @@ public void testGatewayForRemoteStateForInitialBootstrap() throws IOException { remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, - ClusterState.EMPTY_STATE + ClusterState.EMPTY_STATE, + false ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); @@ -886,7 +887,8 @@ public void testGatewayForRemoteStateForNodeReplacement() throws IOException { remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, - ClusterState.EMPTY_STATE + ClusterState.EMPTY_STATE, + false ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); @@ -918,7 +920,13 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { .clusterUUID(randomAlphaOfLength(10)) .build() ); - gateway = newGatewayForRemoteState(remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, clusterState); + gateway = newGatewayForRemoteState( + remoteClusterStateService, + remoteStoreRestoreService, + persistedStateRegistry, + clusterState, + false + ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); verifyNoInteractions(remoteClusterStateService); @@ -933,13 +941,77 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { } } + public void testGatewayForRemoteStateForInitialBootstrapBlocksApplied() throws IOException { + MockGatewayMetaState gateway = null; + try { + final RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + when(remoteClusterStateService.getLastKnownUUIDFromRemote(clusterName.value())).thenReturn("test-cluster-uuid"); + + final IndexMetadata indexMetadata = IndexMetadata.builder("test-index1") + .settings( + settings(Version.CURRENT).put(SETTING_INDEX_UUID, randomAlphaOfLength(10)) + .put(IndexMetadata.INDEX_READ_ONLY_SETTING.getKey(), true) + ) + .numberOfShards(5) + .numberOfReplicas(1) + .build(); + + final ClusterState clusterState = ClusterState.builder( + createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(randomLong()).build()) + .put(indexMetadata, false) + .clusterUUID(ClusterState.UNKNOWN_UUID) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build() + ) + ).nodes(DiscoveryNodes.EMPTY_NODES).build(); + + final RemoteStoreRestoreService remoteStoreRestoreService = mock(RemoteStoreRestoreService.class); + when(remoteStoreRestoreService.restore(any(), any(), anyBoolean(), any())).thenReturn( + RemoteRestoreResult.build("test-cluster-uuid", null, clusterState) + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + gateway = newGatewayForRemoteState( + remoteClusterStateService, + remoteStoreRestoreService, + persistedStateRegistry, + ClusterState.EMPTY_STATE, + true + ); + PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); + PersistedState lucenePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL); + verify(remoteClusterStateService).getLastKnownUUIDFromRemote(clusterName.value()); // change this + verify(remoteStoreRestoreService).restore(any(ClusterState.class), any(String.class), anyBoolean(), any(String[].class)); + assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); + assertThat( + Metadata.isGlobalStateEquals(lucenePersistedState.getLastAcceptedState().metadata(), clusterState.metadata()), + equalTo(true) + ); + assertThat( + lucenePersistedState.getLastAcceptedState().blocks().hasGlobalBlock(Metadata.CLUSTER_READ_ONLY_BLOCK), + equalTo(true) + ); + assertThat( + IndexMetadata.INDEX_READ_ONLY_SETTING.get( + lucenePersistedState.getLastAcceptedState().metadata().index("test-index1").getSettings() + ), + equalTo(true) + ); + } finally { + IOUtils.close(gateway); + } + } + private MockGatewayMetaState newGatewayForRemoteState( RemoteClusterStateService remoteClusterStateService, RemoteStoreRestoreService remoteStoreRestoreService, PersistedStateRegistry persistedStateRegistry, - ClusterState currentState + ClusterState currentState, + boolean prepareFullState ) throws IOException { - MockGatewayMetaState gateway = new MockGatewayMetaState(localNode, bigArrays); + MockGatewayMetaState gateway = new MockGatewayMetaState(localNode, bigArrays, prepareFullState); String randomRepoName = "randomRepoName"; String stateRepoTypeAttributeKey = String.format( Locale.getDefault(), @@ -963,6 +1035,7 @@ private MockGatewayMetaState newGatewayForRemoteState( when(clusterService.getClusterSettings()).thenReturn( new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); + when(transportService.getLocalNode()).thenReturn(mock(DiscoveryNode.class)); final PersistedClusterStateService persistedClusterStateService = new PersistedClusterStateService( nodeEnvironment, xContentRegistry(), diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java index 66426c2a880a3..6c9a3201656d7 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java @@ -26,6 +26,33 @@ public class ClusterMetadataManifestTests extends OpenSearchTestCase { + public void testClusterMetadataManifestXContentV0() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( + 1L, + 1L, + "test-cluster-uuid", + "test-state-uuid", + Version.CURRENT, + "test-node-id", + false, + ClusterMetadataManifest.CODEC_V0, + null, + Collections.singletonList(uploadedIndexMetadata), + "prev-cluster-uuid", + true + ); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContentV0(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + public void testClusterMetadataManifestXContent() throws IOException { UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( @@ -36,6 +63,8 @@ public void testClusterMetadataManifestXContent() throws IOException { Version.CURRENT, "test-node-id", false, + ClusterMetadataManifest.CODEC_V1, + "test-global-metadata-file", Collections.singletonList(uploadedIndexMetadata), "prev-cluster-uuid", true @@ -60,6 +89,8 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { Version.CURRENT, "B10RX1f5RJenMQvYccCgSQ", true, + 1, + "test-global-metadata-file", randomUploadedIndexMetadataList(), "yfObdx8KSMKKrXf8UyHhM", true diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 97825dcb58d33..7097603375ad8 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -12,6 +12,7 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexGraveyard; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNodes; @@ -28,6 +29,7 @@ import org.opensearch.common.lucene.store.ByteArrayIndexInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; @@ -66,10 +68,11 @@ import org.mockito.ArgumentMatchers; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; -import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -232,14 +235,15 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { assertThat(manifest.getIndices().get(0).getIndexName(), is(uploadedIndexMetadata.getIndexName())); assertThat(manifest.getIndices().get(0).getIndexUUID(), is(uploadedIndexMetadata.getIndexUUID())); assertThat(manifest.getIndices().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); - assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 1); - assertEquals(writeContextArgumentCaptor.getAllValues().size(), 1); + assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 2); + assertEquals(writeContextArgumentCaptor.getAllValues().size(), 2); WriteContext capturedWriteContext = writeContextArgumentCaptor.getValue(); byte[] writtenBytes = capturedWriteContext.getStreamProvider(Integer.MAX_VALUE).provideStream(0).getInputStream().readAllBytes(); @@ -263,7 +267,7 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { } - public void testWriteFullMetadataInParallelFailure() throws IOException { + public void testWriteFullMetadataFailureForGlobalMetadata() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); @@ -274,6 +278,27 @@ public void testWriteFullMetadataInParallelFailure() throws IOException { return null; }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); + assertThrows( + RemoteClusterStateService.GlobalMetadataTransferException.class, + () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + ); + } + + public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + + doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; + }).doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + return null; + }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); assertThrows( RemoteClusterStateService.IndexMetadataTransferException.class, @@ -338,6 +363,207 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + /* + * Here we will verify the migration of manifest file from codec V0 and V1. + * + * Initially codec version is 0 and global metadata is also null, we will perform index metadata update. + * In final manifest codec version should be 1 and + * global metadata should be updated, even if it was not changed in this cluster state update + */ + public void testMigrationFromCodecV0ManifestToCodecV1Manifest() throws IOException { + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .nodes(nodesWithLocalNodeClusterManager()) + .build(); + + // Update only index metadata + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(previousClusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(previousClusterState).metadata(newMetadata).build(); + + // previous manifest with codec 0 and null global metadata + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(ClusterMetadataManifest.CODEC_V0) + .globalMetadataFileName(null) + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifestAfterUpdate = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + newClusterState, + previousManifest + ); + + // global metadata is updated + assertThat(manifestAfterUpdate.getGlobalMetadataFileName(), notNullValue()); + // Manifest file with codec version with 1 is updated. + assertThat(manifestAfterUpdate.getCodecVersion(), is(ClusterMetadataManifest.CODEC_V1)); + } + + public void testWriteIncrementalGlobalMetadataSuccess() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .globalMetadataFileName("mock-filename") + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .build(); + + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + } + + /* + * Here we will verify index metadata is not uploaded again if change is only in global metadata + */ + public void testGlobalMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with index. + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + // Updating remote cluster state with changing index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only global metadata is different + Metadata newMetadata = Metadata.builder(clusterState.metadata()) + .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterIndexMetadataUpdate + ); + + // Verify that index metadata information is same in manifest files + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(manifestAfterGlobalMetadataUpdate.getIndices().size())); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexName(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexName()) + ); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexUUID(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexUUID()) + ); + + // since timestamp is part of file name, if file name is same we can confirm that file is not update in global metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getUploadedFilename(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getUploadedFilename()) + ); + + // global metadata file would have changed + assertFalse( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName() + .equalsIgnoreCase(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + } + + /* + * Here we will verify global metadata is not uploaded again if change is only in index metadata + */ + public void testIndexMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with global metadata. + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + + // Updating remote cluster state with changing global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only Index metadata is different + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(clusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterGlobalMetadataUpdate + ); + + // Verify that global metadata information is same in manifest files after updating index Metadata + // since timestamp is part of file name, if file name is same we can confirm that file is not update in index metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName(), + is(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + + // Index metadata would have changed + assertThat(manifestAfterGlobalMetadataUpdate.getIndices().size(), is(0)); + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(1)); + } + public void testReadLatestMetadataManifestFailedIOException() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); @@ -402,6 +628,7 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); BlobContainer blobContainer = mockBlobStoreObjects(); @@ -409,7 +636,8 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE remoteClusterStateService.start(); assertEquals( - remoteClusterStateService.getLatestIndexMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() .size(), 0 ); @@ -437,10 +665,8 @@ public void testReadLatestMetadataManifestSuccessButIndexMetadataFetchIOExceptio remoteClusterStateService.start(); Exception e = assertThrows( IllegalStateException.class, - () -> remoteClusterStateService.getLatestIndexMetadata( - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID() - ) + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() ); assertEquals(e.getMessage(), "Error while downloading IndexMetadata - " + uploadedIndexMetadata.getUploadedFilename()); } @@ -458,6 +684,7 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { .clusterUUID("cluster-uuid") .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .codecVersion(ClusterMetadataManifest.CODEC_V0) .previousClusterUUID("prev-cluster-uuid") .build(); @@ -478,6 +705,70 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + public void testReadGlobalMetadata() throws IOException { + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry( + List.of(new NamedXContentRegistry.Entry(Metadata.Custom.class, new ParseField(IndexGraveyard.TYPE), IndexGraveyard::fromXContent)))); + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName("global-metadata-file") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expactedMetadata); + + Metadata metadata = remoteClusterStateService.getLatestMetadata( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + + assertTrue(Metadata.isGlobalStateEquals(metadata, expactedMetadata)); + } + + public void testReadGlobalMetadataIOException() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + String globalIndexMetadataName = "global-metadata-file"; + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName(globalIndexMetadataName) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + + BlobContainer blobContainer = mockBlobStoreObjects(); + mockBlobContainerForGlobalMetadata(blobContainer, expectedManifest, expactedMetadata); + + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(globalIndexMetadataName))).thenThrow( + FileNotFoundException.class + ); + + remoteClusterStateService.start(); + Exception e = assertThrows( + IllegalStateException.class, + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + ); + assertEquals(e.getMessage(), "Error while downloading Global Metadata - " + globalIndexMetadataName); + } + public void testReadLatestIndexMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); @@ -504,14 +795,16 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .globalMetadataFileName("global-metadata-file") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); mockBlobContainer(mockBlobStoreObjects(), expectedManifest, Map.of(index.getUUID(), indexMetadata)); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() - ); + ).getIndices(); assertEquals(indexMetadataMap.size(), 1); assertEquals(indexMetadataMap.get(index.getUUID()).getIndex().getName(), index.getName()); @@ -691,7 +984,7 @@ public void testFileNames() { String indexMetadataFileName = RemoteClusterStateService.indexMetadataFileName(indexMetadata); String[] splittedIndexMetadataFileName = indexMetadataFileName.split(DELIMITER); assertThat(indexMetadataFileName.split(DELIMITER).length, is(4)); - assertThat(splittedIndexMetadataFileName[0], is(INDEX_METADATA_FILE_PREFIX)); + assertThat(splittedIndexMetadataFileName[0], is(METADATA_FILE_PREFIX)); assertThat(splittedIndexMetadataFileName[1], is(RemoteStoreUtils.invertLong(indexMetadata.getVersion()))); assertThat(splittedIndexMetadataFileName[3], is(String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION))); @@ -820,6 +1113,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) + .globalMetadataFileName("test-global-metadata") .build(); } @@ -859,7 +1153,8 @@ private void mockBlobContainer( BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, "manifestFileName", - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); @@ -873,7 +1168,8 @@ private void mockBlobContainer( BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( indexMetadata, fileName, - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob(fileName + ".dat")).thenReturn( new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) @@ -884,6 +1180,57 @@ private void mockBlobContainer( }); } + private void mockBlobContainerForGlobalMetadata( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Metadata metadata + ) throws IOException { + String mockManifestFileName = "manifest__1__2__C__456__1"; + BlobMetadata blobMetadata = new PlainBlobMetadata(mockManifestFileName, 1); + when( + blobContainer.listBlobsByPrefixInSortedOrder( + "manifest" + RemoteClusterStateService.DELIMITER, + 1, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(Arrays.asList(blobMetadata)); + + BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( + clusterMetadataManifest, + mockManifestFileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( + new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + ); + } + + private static ClusterState.Builder generateClusterStateWithGlobalMetadata() { + final Settings clusterSettings = Settings.builder().put("cluster.blocks.read_only", true).build(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + + return ClusterState.builder(ClusterName.DEFAULT) + .version(1L) + .stateUUID("state-uuid") + .metadata( + Metadata.builder() + .persistentSettings(clusterSettings) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() + ); + } + private static ClusterState.Builder generateClusterStateWithOneIndex() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index 6a99063d11353..fe389e3b3fcb4 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -43,6 +43,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -388,9 +389,10 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { - super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, listener); + super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); runAfterGetFiles[index.getAndIncrement()].run(); } diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 528402d48658a..52f28aead533d 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -91,6 +91,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; import java.util.function.Function; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -725,6 +726,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // set the listener, we will only fail it once we cancel the source. diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java index c394101697b47..f0950fe5392de 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java @@ -47,6 +47,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; @@ -87,6 +88,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // randomly resolve the listener, indicating the source has resolved. @@ -131,6 +133,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be reached"); @@ -176,6 +179,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); @@ -223,6 +227,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) {} }; @@ -269,6 +274,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); diff --git a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java index bcacef83d190a..e4dd32e5c6f70 100644 --- a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java @@ -125,6 +125,7 @@ public void testGetSegmentFiles() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -153,6 +154,7 @@ public void testTransportTimeoutForGetSegmentFilesAction() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -178,6 +180,7 @@ public void testGetSegmentFiles_CancelWhileRequestOpen() throws InterruptedExcep checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, new ActionListener<>() { @Override public void onResponse(GetSegmentFilesResponse getSegmentFilesResponse) { diff --git a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java index 9204f48ba5bdd..287962b158c79 100644 --- a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java @@ -90,7 +90,7 @@ public void testGetSegmentFiles() throws ExecutionException, InterruptedExceptio List filesToFetch = primaryShard.getSegmentMetadataMap().values().stream().collect(Collectors.toList()); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, res); + replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, (fileName, bytesRecovered) -> {}, res); GetSegmentFilesResponse response = res.get(); assertEquals(response.files.size(), filesToFetch.size()); assertTrue(response.files.containsAll(filesToFetch)); @@ -104,7 +104,14 @@ public void testGetSegmentFilesAlreadyExists() throws IOException, InterruptedEx try { final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + filesToFetch, + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); res.get(); } catch (AssertionError | ExecutionException ex) { latch.countDown(); @@ -118,7 +125,14 @@ public void testGetSegmentFilesReturnEmptyResponse() throws ExecutionException, final ReplicationCheckpoint checkpoint = primaryShard.getLatestReplicationCheckpoint(); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, Collections.emptyList(), primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + Collections.emptyList(), + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); GetSegmentFilesResponse response = res.get(); assert (response.files.isEmpty()); } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index c108de5ee5ea6..7b02635525264 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -39,6 +39,7 @@ import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -52,6 +53,7 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; @@ -211,6 +213,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be called"); @@ -246,7 +249,7 @@ public void testAlreadyOnNewCheckpoint() { verify(spy, times(0)).startReplication(any(), any(), any()); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8928") + @TestLogging(reason = "Getting trace logs from replication package", value = "org.opensearch.indices.replication:TRACE") public void testShardAlreadyReplicating() { CountDownLatch blockGetCheckpointMetadata = new CountDownLatch(1); SegmentReplicationSource source = new TestReplicationSource() { @@ -276,6 +279,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); @@ -333,6 +337,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java index a9d7d3cdd32fc..8b4b3aff701b4 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java @@ -40,6 +40,7 @@ import org.opensearch.index.store.StoreTests; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationFailedException; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; @@ -53,6 +54,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.BiConsumer; import org.mockito.Mockito; @@ -131,10 +133,12 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { assertEquals(1, filesToFetch.size()); assert (filesToFetch.contains(SEGMENT_FILE)); + filesToFetch.forEach(storeFileMetadata -> fileProgressTracker.accept(storeFileMetadata.name(), storeFileMetadata.length())); listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } }; @@ -149,6 +153,19 @@ public void getSegmentFiles( public void onResponse(Void replicationResponse) { try { verify(spyIndexShard, times(1)).finalizeReplication(any()); + assertEquals( + 1, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); + assertEquals( + 0, + segrepTarget.state().getIndex().fileDetails().stream().filter(file -> file.fullyRecovered() == false).count() + ); segrepTarget.markAsDone(); } catch (IOException ex) { Assert.fail(); @@ -182,6 +199,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -200,6 +218,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -225,6 +252,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onFailure(exception); @@ -243,6 +271,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -268,6 +305,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -314,6 +352,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -358,6 +397,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -376,6 +416,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertTrue(e instanceof OpenSearchCorruptionException); assertTrue(e.getMessage().contains("has local copies of segments that differ from the primary")); segrepTarget.fail(new ReplicationFailedException(e), false); @@ -410,6 +459,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java index c4599a6e7a00e..43ebb86fd5342 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java @@ -219,20 +219,17 @@ public void testRepositoriesStatsCanHaveTheSameNameAndDifferentTypeOverTime() { assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", emptyState(), clusterStateWithRepoTypeA)); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(0)); ClusterState clusterStateWithRepoTypeB = createClusterStateWithRepo(repoName, MeteredRepositoryTypeB.TYPE); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); List repositoriesStats = repositoriesService.repositoriesStats(); - assertThat(repositoriesStats.size(), equalTo(2)); + assertThat(repositoriesStats.size(), equalTo(1)); RepositoryStatsSnapshot repositoryStatsTypeA = repositoriesStats.get(0); - assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeA.TYPE)); - assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeA.STATS)); + assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); + assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); - RepositoryStatsSnapshot repositoryStatsTypeB = repositoriesStats.get(1); - assertThat(repositoryStatsTypeB.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); - assertThat(repositoryStatsTypeB.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); } public void testWithSameKeyProviderNames() { @@ -258,7 +255,7 @@ public void testWithSameKeyProviderNames() { kpTypeA ); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(2)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); MeteredRepositoryTypeB repositoryB = (MeteredRepositoryTypeB) repositoriesService.repository("repoName"); assertNotNull(repositoryB); assertNotNull(repository.cryptoHandler); diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java index cf0b06a3f7d16..da0cbcb1d4b17 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.UUIDs; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; @@ -122,14 +121,11 @@ private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repository private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repositoryStats, long clusterVersion) { RepositoryInfo repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), randomAlphaOfLength(10), randomAlphaOfLength(10), - Map.of("bucket", randomAlphaOfLength(10)), - System.currentTimeMillis(), - null + Map.of("bucket", randomAlphaOfLength(10)) ); - return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion); } } diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index 03f0d27188027..c114b56bd0b39 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -152,14 +152,16 @@ public void onFailure(Exception e) { mockBlobContainer, "check-smile", CompressorRegistry.none(), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), mockBlobContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index a520b6278ea47..60a54110fd0b4 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -121,7 +121,8 @@ List adjustNodesStats(List nodesStats) { nodeStats.getWeightedRoutingStats(), nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), - nodeStats.getSearchPipelineStats() + nodeStats.getSearchPipelineStats(), + nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); } diff --git a/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java b/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java index d77596cf5cdd1..2f006a5519d69 100644 --- a/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java +++ b/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java @@ -67,6 +67,12 @@ public class MockGatewayMetaState extends GatewayMetaState { private final BigArrays bigArrays; private final RemoteClusterStateService remoteClusterStateService; private final RemoteStoreRestoreService remoteStoreRestoreService; + private boolean prepareFullState = false; + + public MockGatewayMetaState(DiscoveryNode localNode, BigArrays bigArrays, boolean prepareFullState) { + this(localNode, bigArrays); + this.prepareFullState = prepareFullState; + } public MockGatewayMetaState(DiscoveryNode localNode, BigArrays bigArrays) { this.localNode = localNode; @@ -99,8 +105,12 @@ Metadata upgradeMetadataForNode( @Override ClusterState prepareInitialClusterState(TransportService transportService, ClusterService clusterService, ClusterState clusterState) { - // Just set localNode here, not to mess with ClusterService and IndicesService mocking - return ClusterStateUpdaters.setLocalNode(clusterState, localNode); + if (prepareFullState) { + return super.prepareInitialClusterState(transportService, clusterService, clusterState); + } else { + // Just set localNode here, not to mess with ClusterService and IndicesService mocking + return ClusterStateUpdaters.setLocalNode(clusterState, localNode); + } } @Override @@ -113,6 +123,16 @@ public void start( NodeEnvironment nodeEnvironment, NamedXContentRegistry xContentRegistry, PersistedStateRegistry persistedStateRegistry + ) { + start(settings, nodeEnvironment, xContentRegistry, persistedStateRegistry, false); + } + + public void start( + Settings settings, + NodeEnvironment nodeEnvironment, + NamedXContentRegistry xContentRegistry, + PersistedStateRegistry persistedStateRegistry, + boolean prepareFullState ) { final TransportService transportService = mock(TransportService.class); when(transportService.getThreadPool()).thenReturn(mock(ThreadPool.class)); @@ -126,6 +146,7 @@ public void start( } catch (IOException e) { throw new AssertionError(e); } + this.prepareFullState = prepareFullState; start( settings, transportService, diff --git a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java index b29e25a0bff2c..bcd47e3d578ee 100644 --- a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java +++ b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java @@ -17,6 +17,7 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import java.util.List; +import java.util.function.BiConsumer; /** * This class is used by unit tests implementing SegmentReplicationSource @@ -36,6 +37,7 @@ public abstract void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 9dc230474482f..412d5235fe462 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -174,6 +174,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; @@ -1620,6 +1621,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try ( diff --git a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java index dff9b997d87db..faa9d52b105b2 100644 --- a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java @@ -238,7 +238,7 @@ public void testRequestStats() throws Exception { assertEquals(assertionErrorMsg, mockCalls, sdkRequestCounts); } - private Map getMockRequestCounts() { + protected Map getMockRequestCounts() { for (HttpHandler h : handlers.values()) { while (h instanceof DelegatingHttpHandler) { if (h instanceof HttpStatsCollectorHandler) { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 3c7423f73685f..898e125b94954 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1853,10 +1853,12 @@ public synchronized void stopRandomNodeNotCurrentMaster() throws IOException { */ public void stopAllNodes() { try { - int totalDataNodes = numDataNodes(); - while (totalDataNodes > 0) { - stopRandomDataNode(); - totalDataNodes -= 1; + if (numDataAndClusterManagerNodes() != numClusterManagerNodes()) { + int totalDataNodes = numDataNodes(); + while (totalDataNodes > 0) { + stopRandomDataNode(); + totalDataNodes -= 1; + } } int totalClusterManagerNodes = numClusterManagerNodes(); while (totalClusterManagerNodes > 1) { @@ -2719,6 +2721,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat(