diff --git a/.ci/bwcVersions b/.ci/bwcVersions index cfaadc5ed1e5e..144a8b71fca39 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -25,4 +25,5 @@ BWC_VERSION: - "2.10.0" - "2.10.1" - "2.11.0" + - "2.11.1" - "2.12.0" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4fa118e8486f1..8076adcf00ca9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1 +1 @@ -* @reta @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @gbbafna @setiah @kartg @kotwanikunal @mch2 @nknize @owaiskazi19 @peternied @Rishikesh1159 @ryanbogan @saratvemulapalli @shwetathareja @dreamer-89 @tlfeng @VachaShah @dbwiddis @sachinpkale @sohami @msfroh +* @abbashus @adnapibar @anasalkouz @andrross @Bukhtawar @CEHENKLE @dblock @dbwiddis @dreamer-89 @gbbafna @kartg @kotwanikunal @mch2 @msfroh @nknize @owaiskazi19 @peternied @reta @Rishikesh1159 @ryanbogan @sachinpkale @saratvemulapalli @setiah @shwetathareja @sohami @tlfeng @VachaShah diff --git a/CHANGELOG.md b/CHANGELOG.md index 638b64de9ab37..e4aa8d5d60208 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,11 +10,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow mmap to use new JDK-19 preview APIs in Apache Lucene 9.4+ ([#5151](https://github.com/opensearch-project/OpenSearch/pull/5151)) - Add events correlation engine plugin ([#6854](https://github.com/opensearch-project/OpenSearch/issues/6854)) - Introduce new dynamic cluster setting to control slice computation for concurrent segment search ([#9107](https://github.com/opensearch-project/OpenSearch/pull/9107)) -- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679)) +- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679), [#10664](https://github.com/opensearch-project/OpenSearch/pull/10664)) - Provide service accounts tokens to extensions ([#9618](https://github.com/opensearch-project/OpenSearch/pull/9618)) - [Admission control] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10541](https://github.com/opensearch-project/OpenSearch/pull/10541)) - [Admission control] Add Resource usage collector service and resource usage tracker ([#9890](https://github.com/opensearch-project/OpenSearch/pull/9890)) -- Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) +- [Remote cluster state] Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) +- [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) +- [Remote cluster state] Restore global metadata from remote store when local state is lost after quorum loss ([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -86,17 +89,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added -- Add coordinator level stats for search latency ([#8386](https://github.com/opensearch-project/OpenSearch/issues/8386)) -- Add metrics for thread_pool task wait time ([#9681](https://github.com/opensearch-project/OpenSearch/pull/9681)) -- Async blob read support for S3 plugin ([#9694](https://github.com/opensearch-project/OpenSearch/pull/9694)) -- [Telemetry-Otel] Added support for OtlpGrpcSpanExporter exporter ([#9666](https://github.com/opensearch-project/OpenSearch/pull/9666)) -- Async blob read support for encrypted containers ([#10131](https://github.com/opensearch-project/OpenSearch/pull/10131)) -- Add capability to restrict async durability mode for remote indexes ([#10189](https://github.com/opensearch-project/OpenSearch/pull/10189)) -- Add Doc Status Counter for Indexing Engine ([#4562](https://github.com/opensearch-project/OpenSearch/issues/4562)) -- Add unreferenced file cleanup count to merge stats ([#10204](https://github.com/opensearch-project/OpenSearch/pull/10204)) -- [Remote Store] Add support to restrict creation & deletion if system repository and mutation of immutable settings of system repository ([#9839](https://github.com/opensearch-project/OpenSearch/pull/9839)) -- Improve compressed request handling ([#10261](https://github.com/opensearch-project/OpenSearch/pull/10261)) - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) +- [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) +- Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) +- Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) - Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670)) ### Dependencies diff --git a/libs/core/src/main/java/org/opensearch/Version.java b/libs/core/src/main/java/org/opensearch/Version.java index eef4da719994c..8d9ee73a02c1d 100644 --- a/libs/core/src/main/java/org/opensearch/Version.java +++ b/libs/core/src/main/java/org/opensearch/Version.java @@ -96,6 +96,7 @@ public class Version implements Comparable, ToXContentFragment { public static final Version V_2_10_0 = new Version(2100099, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_10_1 = new Version(2100199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_11_0 = new Version(2110099, org.apache.lucene.util.Version.LUCENE_9_7_0); + public static final Version V_2_11_1 = new Version(2110199, org.apache.lucene.util.Version.LUCENE_9_7_0); public static final Version V_2_12_0 = new Version(2120099, org.apache.lucene.util.Version.LUCENE_9_8_0); public static final Version V_3_0_0 = new Version(3000099, org.apache.lucene.util.Version.LUCENE_9_8_0); public static final Version CURRENT = V_3_0_0; diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 5f88ad7867513..4df30bfd2169e 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -37,6 +37,8 @@ import software.amazon.awssdk.core.internal.http.pipeline.stages.ApplyTransactionIdStage; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.SuppressForbidden; @@ -51,10 +53,15 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryMissingException; +import org.opensearch.repositories.RepositoryStats; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.blobstore.OpenSearchMockAPIBasedRepositoryIntegTestCase; import org.opensearch.repositories.s3.utils.AwsRequestSigner; import org.opensearch.snapshots.mockstore.BlobStoreWrapper; +import org.opensearch.test.BackgroundIndexer; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; @@ -63,12 +70,18 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.stream.StreamSupport; import fixture.s3.S3HttpHandler; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; @SuppressForbidden(reason = "this test uses a HttpServer to emulate an S3 endpoint") // Need to set up a new cluster for each test because cluster settings use randomized authentication settings @@ -152,6 +165,67 @@ protected Settings nodeSettings(int nodeOrdinal) { return builder.build(); } + @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/10735") + @Override + public void testRequestStats() throws Exception { + final String repository = createRepository(randomName()); + final String index = "index-no-merges"; + createIndex( + index, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + + final long nbDocs = randomLongBetween(10_000L, 20_000L); + try (BackgroundIndexer indexer = new BackgroundIndexer(index, "_doc", client(), (int) nbDocs)) { + waitForDocs(nbDocs, indexer); + } + + flushAndRefresh(index); + ForceMergeResponse forceMerge = client().admin().indices().prepareForceMerge(index).setFlush(true).setMaxNumSegments(1).get(); + assertThat(forceMerge.getSuccessfulShards(), equalTo(1)); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + final String snapshot = "snapshot"; + assertSuccessfulSnapshot( + client().admin().cluster().prepareCreateSnapshot(repository, snapshot).setWaitForCompletion(true).setIndices(index) + ); + + assertAcked(client().admin().indices().prepareDelete(index)); + + assertSuccessfulRestore(client().admin().cluster().prepareRestoreSnapshot(repository, snapshot).setWaitForCompletion(true)); + ensureGreen(index); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + assertAcked(client().admin().cluster().prepareDeleteSnapshot(repository, snapshot).get()); + + final RepositoryStats repositoryStats = StreamSupport.stream( + internalCluster().getInstances(RepositoriesService.class).spliterator(), + false + ).map(repositoriesService -> { + try { + return repositoriesService.repository(repository); + } catch (RepositoryMissingException e) { + return null; + } + }).filter(Objects::nonNull).map(Repository::stats).reduce(RepositoryStats::merge).get(); + + Map> extendedStats = repositoryStats.extendedStats; + Map aggregatedStats = new HashMap<>(); + extendedStats.forEach((k, v) -> { + if (k == BlobStore.Metric.RETRY_COUNT || k == BlobStore.Metric.REQUEST_SUCCESS || k == BlobStore.Metric.REQUEST_FAILURE) { + for (Map.Entry entry : v.entrySet()) { + aggregatedStats.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + } + + }); + final Map mockCalls = getMockRequestCounts(); + + String assertionErrorMsg = String.format("SDK sent [%s] calls and handler measured [%s] calls", aggregatedStats, mockCalls); + + assertEquals(assertionErrorMsg, mockCalls, aggregatedStats); + } + /** * S3RepositoryPlugin that allows to disable chunked encoding and to set a low threshold between single upload and multipart upload. */ @@ -263,6 +337,8 @@ public void maybeTrack(final String request, Headers requestHeaders) { trackRequest("PutMultipartObject"); } else if (Regex.simpleMatch("PUT /*/*", request)) { trackRequest("PutObject"); + } else if (Regex.simpleMatch("POST /*?delete*", request)) { + trackRequest("DeleteObjects"); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 9777bd974d56c..24aee99242957 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -199,7 +199,7 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp ? amazonS3Reference.get().priorityClient() : amazonS3Reference.get().client(); CompletableFuture completableFuture = blobStore.getAsyncTransferManager() - .uploadObject(s3AsyncClient, uploadRequest, streamContext); + .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { if (throwable == null) { completionListener.onResponse(response); @@ -384,7 +384,7 @@ private void doDeleteBlobs(List blobNames, boolean relative) throws IOEx assert outstanding.isEmpty(); } - private static DeleteObjectsRequest bulkDelete(String bucket, List blobs) { + private DeleteObjectsRequest bulkDelete(String bucket, List blobs) { return DeleteObjectsRequest.builder() .bucket(bucket) .delete( @@ -393,6 +393,7 @@ private static DeleteObjectsRequest bulkDelete(String bucket, List blobs .quiet(true) .build() ) + .overrideConfiguration(o -> o.addMetricPublisher(blobStore.getStatsMetricPublisher().deleteObjectsMetricPublisher)) .build(); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index 80005d92344a4..f568d871dd31a 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -47,6 +47,8 @@ import org.opensearch.repositories.s3.async.AsyncTransferManager; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -180,6 +182,16 @@ public Map stats() { return statsMetricPublisher.getStats().toMap(); } + @Override + public Map> extendedStats() { + if (statsMetricPublisher.getExtendedStats() == null || statsMetricPublisher.getExtendedStats().isEmpty()) { + return Collections.emptyMap(); + } + Map> extendedStats = new HashMap<>(); + statsMetricPublisher.getExtendedStats().forEach((k, v) -> extendedStats.put(k, v.toMap())); + return extendedStats; + } + public ObjectCannedACL getCannedACL() { return cannedACL; } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index a80ee0ca35fae..c6450e49d08e2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -55,6 +56,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; +import org.opensearch.threadpool.ScalingExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -93,17 +95,21 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { @Override public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); + int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); - executorBuilders.add( - new FixedExecutorBuilder(settings, PRIORITY_STREAM_READER, priorityPoolCount(settings), 10_000, PRIORITY_STREAM_READER) - ); + executorBuilders.add(new ScalingExecutorBuilder(PRIORITY_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); + executorBuilders.add(new FixedExecutorBuilder(settings, FUTURE_COMPLETION, normalPoolCount(settings), 10_000, FUTURE_COMPLETION)); - executorBuilders.add(new FixedExecutorBuilder(settings, STREAM_READER, normalPoolCount(settings), 10_000, STREAM_READER)); + executorBuilders.add(new ScalingExecutorBuilder(STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); return executorBuilders; } + static int halfAllocatedProcessorsMaxFive(final int allocatedProcessors) { + return boundedBy((allocatedProcessors + 1) / 2, 1, 5); + } + S3RepositoryPlugin(final Settings settings, final Path configPath, final S3Service service, final S3AsyncService s3AsyncService) { this.service = Objects.requireNonNull(service, "S3 service must not be null"); this.configPath = configPath; diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java index cad0037f99249..0c63bfdb1ff97 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java @@ -8,10 +8,13 @@ package org.opensearch.repositories.s3; -import software.amazon.awssdk.http.HttpMetric; import software.amazon.awssdk.metrics.MetricCollection; import software.amazon.awssdk.metrics.MetricPublisher; +import software.amazon.awssdk.metrics.MetricRecord; +import org.opensearch.common.blobstore.BlobStore; + +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; @@ -20,18 +23,67 @@ public class StatsMetricPublisher { private final Stats stats = new Stats(); + private final Map extendedStats = new HashMap<>() { + { + put(BlobStore.Metric.REQUEST_LATENCY, new Stats()); + put(BlobStore.Metric.REQUEST_SUCCESS, new Stats()); + put(BlobStore.Metric.REQUEST_FAILURE, new Stats()); + put(BlobStore.Metric.RETRY_COUNT, new Stats()); + } + }; + public MetricPublisher listObjectsMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.listCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).listMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).listMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).listMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).listMetrics.addAndGet(1); + } + stats.listMetrics.addAndGet(1); + break; + } + } + } + + @Override + public void close() {} + }; + + public MetricPublisher deleteObjectsMetricPublisher = new MetricPublisher() { + @Override + public void publish(MetricCollection metricCollection) { + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).deleteMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).deleteMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).deleteMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).deleteMetrics.addAndGet(1); + } + stats.deleteMetrics.addAndGet(1); + break; + } + } } @Override @@ -41,15 +93,26 @@ public void close() {} public MetricPublisher getObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.getCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).getMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).getMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).getMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).getMetrics.addAndGet(1); + } + stats.getMetrics.addAndGet(1); + break; + } + } } @Override @@ -59,15 +122,26 @@ public void close() {} public MetricPublisher putObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.putCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).putMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).putMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).putMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).putMetrics.addAndGet(1); + } + stats.putMetrics.addAndGet(1); + break; + } + } } @Override @@ -77,15 +151,26 @@ public void close() {} public MetricPublisher multipartUploadMetricCollector = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.postCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).multiPartPutMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).multiPartPutMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).multiPartPutMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).multiPartPutMetrics.addAndGet(1); + } + stats.multiPartPutMetrics.addAndGet(1); + break; + } + } } @Override @@ -96,22 +181,29 @@ public Stats getStats() { return stats; } + public Map getExtendedStats() { + return extendedStats; + } + static class Stats { - final AtomicLong listCount = new AtomicLong(); + final AtomicLong listMetrics = new AtomicLong(); + + final AtomicLong getMetrics = new AtomicLong(); - final AtomicLong getCount = new AtomicLong(); + final AtomicLong putMetrics = new AtomicLong(); - final AtomicLong putCount = new AtomicLong(); + final AtomicLong deleteMetrics = new AtomicLong(); - final AtomicLong postCount = new AtomicLong(); + final AtomicLong multiPartPutMetrics = new AtomicLong(); Map toMap() { final Map results = new HashMap<>(); - results.put("GetObject", getCount.get()); - results.put("ListObjects", listCount.get()); - results.put("PutObject", putCount.get()); - results.put("PutMultipartObject", postCount.get()); + results.put("GetObject", getMetrics.get()); + results.put("ListObjects", listMetrics.get()); + results.put("PutObject", putMetrics.get()); + results.put("DeleteObjects", deleteMetrics.get()); + results.put("PutMultipartObject", multiPartPutMetrics.get()); return results; } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index ad6939ce299d6..6007d9f9c8a1c 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -23,10 +23,13 @@ import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.concurrent.CompletableFuture; @@ -138,26 +141,39 @@ private static void uploadPart( ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH ? priorityExecutorService : executorService; + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); CompletableFuture uploadPartResponseFuture = SocketAccess.doPrivileged( () -> s3AsyncClient.uploadPart( uploadPartRequest, - AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), - inputStreamContainer.getContentLength(), - streamReadExecutor - ) + AsyncRequestBody.fromInputStream(inputStream, inputStreamContainer.getContentLength(), streamReadExecutor) ) ); - CompletableFuture convertFuture = uploadPartResponseFuture.thenApply( - uploadPartResponse -> convertUploadPartResponse( - completedParts, - inputStreamContainers, - uploadPartResponse, - partNumber, - uploadRequest.doRemoteDataIntegrityCheck() - ) - ); + CompletableFuture convertFuture = uploadPartResponseFuture.whenComplete((resp, throwable) -> { + try { + inputStream.close(); + } catch (IOException ex) { + log.error( + () -> new ParameterizedMessage( + "Failed to close stream while uploading a part of idx {} and file {}.", + uploadPartRequest.partNumber(), + uploadPartRequest.key() + ), + ex + ); + } + }) + .thenApply( + uploadPartResponse -> convertUploadPartResponse( + completedParts, + inputStreamContainers, + uploadPartResponse, + partNumber, + uploadRequest.doRemoteDataIntegrityCheck() + ) + ); futures.add(convertFuture); CompletableFutureUtils.forwardExceptionTo(convertFuture, uploadPartResponseFuture); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 8d45c2167a3d1..a52745e33073e 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -35,9 +35,12 @@ import org.opensearch.common.util.ByteUtils; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; +import java.io.InputStream; import java.util.Arrays; import java.util.Base64; import java.util.List; @@ -86,16 +89,21 @@ public AsyncTransferManager(long minimumPartSize, ExecutorService executorServic * @param streamContext The {@link StreamContext} to supply streams during upload * @return A {@link CompletableFuture} to listen for upload completion */ - public CompletableFuture uploadObject(S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext) { + public CompletableFuture uploadObject( + S3AsyncClient s3AsyncClient, + UploadRequest uploadRequest, + StreamContext streamContext, + StatsMetricPublisher statsMetricPublisher + ) { CompletableFuture returnFuture = new CompletableFuture<>(); try { if (streamContext.getNumberOfParts() == 1) { log.debug(() -> "Starting the upload as a single upload part request"); - uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture); + uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture, statsMetricPublisher); } else { log.debug(() -> "Starting the upload as multipart upload request"); - uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture); + uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture, statsMetricPublisher); } } catch (Throwable throwable) { returnFuture.completeExceptionally(throwable); @@ -108,12 +116,14 @@ private void uploadInParts( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { CreateMultipartUploadRequest.Builder createMultipartUploadRequestBuilder = CreateMultipartUploadRequest.builder() .bucket(uploadRequest.getBucket()) - .key(uploadRequest.getKey()); + .key(uploadRequest.getKey()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)); if (uploadRequest.doRemoteDataIntegrityCheck()) { createMultipartUploadRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); } @@ -286,12 +296,14 @@ private void uploadInOneChunk( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, InputStreamContainer inputStreamContainer, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder() .bucket(uploadRequest.getBucket()) .key(uploadRequest.getKey()) - .contentLength(uploadRequest.getContentLength()); + .contentLength(uploadRequest.getContentLength()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.putObjectMetricPublisher)); if (uploadRequest.doRemoteDataIntegrityCheck()) { putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); @@ -299,15 +311,22 @@ private void uploadInOneChunk( ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH ? priorityExecutorService : executorService; + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); CompletableFuture putObjectFuture = SocketAccess.doPrivileged( () -> s3AsyncClient.putObject( putObjectRequestBuilder.build(), - AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), - inputStreamContainer.getContentLength(), - streamReadExecutor - ) + AsyncRequestBody.fromInputStream(inputStream, inputStreamContainer.getContentLength(), streamReadExecutor) ).handle((resp, throwable) -> { + try { + inputStream.close(); + } catch (IOException e) { + log.error( + () -> new ParameterizedMessage("Failed to close stream while uploading single file {}.", uploadRequest.getKey()), + e + ); + } if (throwable != null) { Throwable unwrappedThrowable = ExceptionsHelper.unwrap(throwable, S3Exception.class); if (unwrappedThrowable != null) { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 9c07b929052bc..97a746cdeed93 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -33,12 +33,18 @@ import org.opensearch.common.io.InputStreamContainer; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.blobstore.ZeroInputStream; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executors; +import java.util.concurrent.atomic.AtomicReference; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -70,17 +76,17 @@ public void testOneChunkUpload() { putObjectResponseCompletableFuture ); + AtomicReference streamRef = new AtomicReference<>(); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(1), WritePriority.HIGH, uploadSuccess -> { // do nothing }, false, null), - new StreamContext( - (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), - ByteSizeUnit.MB.toBytes(1), - ByteSizeUnit.MB.toBytes(1), - 1 - ) + new StreamContext((partIdx, partSize, position) -> { + streamRef.set(new ZeroInputStream(partSize)); + return new InputStreamContainer(streamRef.get(), partSize, position); + }, ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1), + new StatsMetricPublisher() ); try { @@ -90,6 +96,14 @@ public void testOneChunkUpload() { } verify(s3AsyncClient, times(1)).putObject(any(PutObjectRequest.class), any(AsyncRequestBody.class)); + + boolean closeError = false; + try { + streamRef.get().available(); + } catch (IOException e) { + closeError = e.getMessage().equals("Stream closed"); + } + assertTrue("InputStream was still open after upload", closeError); } public void testOneChunkUploadCorruption() { @@ -118,7 +132,8 @@ public void testOneChunkUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1 - ) + ), + new StatsMetricPublisher() ); try { @@ -159,17 +174,18 @@ public void testMultipartUpload() { abortMultipartUploadResponseCompletableFuture ); + List streams = new ArrayList<>(); CompletableFuture resultFuture = asyncTransferManager.uploadObject( s3AsyncClient, new UploadRequest("bucket", "key", ByteSizeUnit.MB.toBytes(5), WritePriority.HIGH, uploadSuccess -> { // do nothing }, true, 3376132981L), - new StreamContext( - (partIdx, partSize, position) -> new InputStreamContainer(new ZeroInputStream(partSize), partSize, position), - ByteSizeUnit.MB.toBytes(1), - ByteSizeUnit.MB.toBytes(1), - 5 - ) + new StreamContext((partIdx, partSize, position) -> { + InputStream stream = new ZeroInputStream(partSize); + streams.add(stream); + return new InputStreamContainer(stream, partSize, position); + }, ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5), + new StatsMetricPublisher() ); try { @@ -178,6 +194,16 @@ public void testMultipartUpload() { fail("did not expect resultFuture to fail"); } + streams.forEach(stream -> { + boolean closeError = false; + try { + stream.available(); + } catch (IOException e) { + closeError = e.getMessage().equals("Stream closed"); + } + assertTrue("InputStream was still open after upload", closeError); + }); + verify(s3AsyncClient, times(1)).createMultipartUpload(any(CreateMultipartUploadRequest.class)); verify(s3AsyncClient, times(5)).uploadPart(any(UploadPartRequest.class), any(AsyncRequestBody.class)); verify(s3AsyncClient, times(1)).completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); @@ -219,7 +245,8 @@ public void testMultipartUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5 - ) + ), + new StatsMetricPublisher() ); try { diff --git a/release-notes/opensearch.release-notes-2.11.0.md b/release-notes/opensearch.release-notes-2.11.0.md index 7ebf1b433c7c6..040cc053469ed 100644 --- a/release-notes/opensearch.release-notes-2.11.0.md +++ b/release-notes/opensearch.release-notes-2.11.0.md @@ -5,6 +5,7 @@ ### Added - Add coordinator level stats for search latency ([#8386](https://github.com/opensearch-project/OpenSearch/issues/8386)) - Add metrics for thread_pool task wait time ([#9681](https://github.com/opensearch-project/OpenSearch/pull/9681)) +- Add parallel file download support for remote store based replication ([#8596](https://github.com/opensearch-project/OpenSearch/pull/8596)) - Async blob read support for S3 plugin ([#9694](https://github.com/opensearch-project/OpenSearch/pull/9694)) - [Telemetry-Otel] Added support for OtlpGrpcSpanExporter exporter ([#9666](https://github.com/opensearch-project/OpenSearch/pull/9666)) - Async blob read support for encrypted containers ([#10131](https://github.com/opensearch-project/OpenSearch/pull/10131)) diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 6fcc89cfe9e9a..7304304e522f8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -86,10 +86,10 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( cluster().getClusterName(), getClusterState().metadata().clusterUUID() - ); + ).getIndices(); assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java index 2053800504c89..8166c0008ed83 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/AbstractRemoteStoreMockRepositoryIntegTestCase.java @@ -114,6 +114,10 @@ protected void cleanupRepo() { } protected String setup(Path repoLocation, double ioFailureRate, String skipExceptionBlobList, long maxFailure) { + return setup(repoLocation, ioFailureRate, skipExceptionBlobList, maxFailure, 0); + } + + protected String setup(Path repoLocation, double ioFailureRate, String skipExceptionBlobList, long maxFailure, int replicaCount) { // The random_control_io_exception_rate setting ensures that 10-25% of all operations to remote store results in /// IOException. skip_exception_on_verification_file & skip_exception_on_list_blobs settings ensures that the // repository creation can happen without failure. @@ -128,6 +132,7 @@ protected String setup(Path repoLocation, double ioFailureRate, String skipExcep internalCluster().startClusterManagerOnlyNode(settings.build()); String dataNodeName = internalCluster().startDataOnlyNode(settings.build()); + internalCluster().startDataOnlyNodes(replicaCount, settings.build()); createIndex(INDEX_NAME); logger.info("--> Created index={}", INDEX_NAME); ensureYellowAndNoInitializingShards(INDEX_NAME); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index ad3e99dd274ce..b8481610869e6 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -58,7 +58,7 @@ protected void restore(String... indices) { ); } - protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { + protected void verifyRestoredData(Map indexStats, String indexName, boolean indexMoreData) throws Exception { ensureYellowAndNoInitializingShards(indexName); ensureGreen(indexName); // This is to ensure that shards that were already assigned will get latest count @@ -68,6 +68,8 @@ protected void verifyRestoredData(Map indexStats, String indexName 30, TimeUnit.SECONDS ); + if (indexMoreData == false) return; + IndexResponse response = indexSingleDoc(indexName); if (indexStats.containsKey(MAX_SEQ_NO_TOTAL + "-shard-" + response.getShardId().id())) { assertEquals(indexStats.get(MAX_SEQ_NO_TOTAL + "-shard-" + response.getShardId().id()) + 1, response.getSeqNo()); @@ -80,6 +82,10 @@ protected void verifyRestoredData(Map indexStats, String indexName ); } + protected void verifyRestoredData(Map indexStats, String indexName) throws Exception { + verifyRestoredData(indexStats, indexName, true); + } + public void prepareCluster(int numClusterManagerNodes, int numDataOnlyNodes, String indices, int replicaCount, int shardCount) { prepareCluster(numClusterManagerNodes, numDataOnlyNodes, indices, replicaCount, shardCount, Settings.EMPTY); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index e2ef5f85abc74..bccca283ba772 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -353,7 +353,13 @@ public void assertRemoteStoreRepositoryOnAllNodes(String repositoryName) { // Validated that all the restricted settings are entact on all the nodes. repository.getRestrictedSystemRepositorySettings() .stream() - .forEach(setting -> assertEquals(setting.get(actualRepository.settings()), setting.get(expectedRepository.settings()))); + .forEach( + setting -> assertEquals( + String.format(Locale.ROOT, "Restricted Settings mismatch [%s]", setting.getKey()), + setting.get(actualRepository.settings()), + setting.get(expectedRepository.settings()) + ) + ); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 5e92bb195680b..3a3e293de9b13 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -8,23 +8,36 @@ package org.opensearch.remotestore; -import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; -import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; +import org.opensearch.action.admin.indices.template.put.PutIndexTemplateRequest; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.block.ClusterBlockException; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.common.settings.Settings; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.nio.file.Files; -import java.util.Locale; +import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.ExecutionException; +import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_SETTING; +import static org.opensearch.cluster.metadata.Metadata.CLUSTER_READ_ONLY_BLOCK; +import static org.opensearch.cluster.metadata.Metadata.SETTING_READ_ONLY_SETTING; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; import static org.opensearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; -import static org.opensearch.indices.ShardLimitValidator.SETTING_MAX_SHARDS_PER_CLUSTER_KEY; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT { @@ -48,47 +61,10 @@ private Map initialTestSetup(int shardCount, int replicaCount, int private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().stopAllNodes(); - addNewNodes(dataNodeCount, clusterManagerNodeCount); + internalCluster().startClusterManagerOnlyNodes(clusterManagerNodeCount); + internalCluster().startDataOnlyNodes(dataNodeCount); } - private void restoreAndValidate(String clusterUUID, Map indexStats) throws Exception { - restoreAndValidate(clusterUUID, indexStats, true); - } - - private void restoreAndValidate(String clusterUUID, Map indexStats, boolean validate) throws Exception { - // TODO once auto restore is merged, the remote cluster state will be restored - - if (validate) { - // Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - verifyRestoredData(indexStats, INDEX_NAME); - } - } - - private void restoreAndValidateFails( - String clusterUUID, - PlainActionFuture actionListener, - Class clazz, - String errorSubString - ) { - - try { - restoreAndValidate(clusterUUID, null, false); - } catch (Exception e) { - assertTrue( - String.format(Locale.ROOT, "%s %s", clazz, e), - clazz.isAssignableFrom(e.getClass()) - || clazz.isAssignableFrom(e.getCause().getClass()) - || (e.getCause().getCause() != null && clazz.isAssignableFrom(e.getCause().getCause().getClass())) - ); - assertTrue( - String.format(Locale.ROOT, "Error message mismatch. Expected: [%s]. Actual: [%s]", errorSubString, e.getMessage()), - e.getMessage().contains(errorSubString) - ); - } - } - - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -106,10 +82,10 @@ public void testFullClusterRestore() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore and validate - restoreAndValidate(prevClusterUUID, indexStats); + validateMetadata(List.of(INDEX_NAME)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -124,6 +100,7 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { Map indexStats2 = indexData(1, false, secondIndexName); assertEquals((shardCount + 1) * (replicaCount + 1), getNumShards(secondIndexName).totalNumShards); ensureGreen(secondIndexName); + updateIndexBlock(true, secondIndexName); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); @@ -134,155 +111,222 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore - restoreAndValidate(prevClusterUUID, indexStats); - ensureGreen(secondIndexName); - verifyRestoredData(indexStats2, secondIndexName); + validateMetadata(List.of(INDEX_NAME, secondIndexName)); + verifyRestoredData(indexStats, INDEX_NAME); + verifyRestoredData(indexStats2, secondIndexName, false); + assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); + assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); + // Test is complete + + // Remove the block to ensure proper cleanup + updateIndexBlock(false, secondIndexName); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreFailureValidationFailures() throws Exception { + public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; - // index some data to generate files in remote directory - Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Step - 1 index some data to generate files in remote directory + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - // Start of Test - 1 - // Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails("randomUUID", future, IllegalStateException.class, "Remote Cluster State not found - randomUUID"); - // End of Test - 1 - - // Start of Test - 3 - // Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - clusterService().state().metadata().clusterUUID(), - future, - IllegalArgumentException.class, - "clusterUUID to restore from should be different from current cluster UUID" - ); - // End of Test - 2 + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + String clusterName = clusterService().state().getClusterName().value(); - // Start of Test - 3 // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - // Restarting cluster with just 1 data node helps with applying cluster settings - resetCluster(1, clusterManagerNodeCount); - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - reduceShardLimits(1, 1); - - // Step - 4 Trigger full cluster restore and validate it fails - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalArgumentException.class, - "this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open" - ); - resetShardLimits(); - // End of Test - 3 + internalCluster().stopAllNodes(); + // Step - 3 Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + assertThrows(IllegalStateException.class, () -> addNewNodes(dataNodeCount, clusterManagerNodeCount)); + // Test is complete - // Start of Test - 4 - // Test -4 Reset cluster and trigger full restore with same name index in the cluster - // Test -4 Add required nodes for this test after last reset. - addNewNodes(dataNodeCount - 1, 0); + // Starting a node without remote state to ensure test cleanup + internalCluster().startNode(Settings.builder().put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false).build()); + } - newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + public void testRemoteStateFullRestart() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 3; - // Test -4 Step - 2 Create a new index with same name - createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); - ensureYellowAndNoInitializingShards(INDEX_NAME); + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) + + "/cluster-state/" + + prevClusterUUID + + "/manifest" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + internalCluster().fullRestart(); ensureGreen(INDEX_NAME); + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; + validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME); + } - future = PlainActionFuture.newFuture(); + private void validateMetadata(List indexNames) { + assertEquals(clusterService().state().metadata().indices().size(), indexNames.size()); + for (String indexName : indexNames) { + assertTrue(clusterService().state().metadata().hasIndex(indexName)); + } + } - // Test -4 Step - 3 Trigger full cluster restore and validate fails - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalStateException.class, - "cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster" + private void validateCurrentMetadata() throws Exception { + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); - - // Test -4 Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - // End of Test - 4 + assertBusy(() -> { + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + ClusterState clusterState = getClusterState(); + Metadata currentMetadata = clusterState.metadata(); + assertEquals(currentMetadata.indices().size(), manifest.getIndices().size()); + assertEquals(currentMetadata.coordinationMetadata().term(), manifest.getClusterTerm()); + assertEquals(clusterState.version(), manifest.getStateVersion()); + assertEquals(clusterState.stateUUID(), manifest.getStateUUID()); + assertEquals(currentMetadata.clusterUUIDCommitted(), manifest.isClusterUUIDCommitted()); + for (UploadedIndexMetadata uploadedIndexMetadata : manifest.getIndices()) { + IndexMetadata currentIndexMetadata = currentMetadata.index(uploadedIndexMetadata.getIndexName()); + assertEquals(currentIndexMetadata.getIndex().getUUID(), uploadedIndexMetadata.getIndexUUID()); + } + }); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { + public void testFullClusterRestoreGlobalMetadata() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; // Step - 1 index some data to generate files in remote directory - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + // Create global metadata - register a custom repo + // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 + // registerCustomRepository(); + + // Create global metadata - persistent settings + updatePersistentSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 34).build()); + + // Create global metadata - index template + putIndexTemplate(); + + // Create global metadata - Put cluster block + addClusterLevelReadOnlyBlock(); + // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata resetCluster(dataNodeCount, clusterManagerNodeCount); String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - // Step - 4 Delete index metadata file in remote - try { - Files.move( - segmentRepoPath.resolve( - RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) - + "/cluster-state/" - + prevClusterUUID - + "/index" - ), - segmentRepoPath.resolve("cluster-state/") - ); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // Step - 5 Trigger full cluster restore and validate fails - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails(prevClusterUUID, future, IllegalStateException.class, "asdsa"); + // Step - 3 Trigger full cluster restore and validate + // validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(); + verifyRestoredIndexTemplate(); + assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); + assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); + assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + // Test is complete + + // Remote the cluster read only block to ensure proper cleanup + updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); + assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); } - private void reduceShardLimits(int maxShardsPerNode, int maxShardsPerCluster) { - // Step 3 - Reduce shard limits to hit shard limit with less no of shards - try { + private void registerCustomRepository() { + assertAcked( client().admin() .cluster() - .updateSettings( - new ClusterUpdateSettingsRequest().transientSettings( - Settings.builder() - .put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode) - .put(SETTING_MAX_SHARDS_PER_CLUSTER_KEY, maxShardsPerCluster) - ) - ) - .get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + .preparePutRepository("custom-repo") + .setType("fs") + .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) + .get() + ); + } + + private void verifyRestoredRepositories() { + RepositoriesMetadata repositoriesMetadata = clusterService().state().metadata().custom(RepositoriesMetadata.TYPE); + assertEquals(2, repositoriesMetadata.repositories().size()); // includes remote store repo as well + assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); + assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); + // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 + // assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + // assertEquals(Settings.builder().put("location", randomRepoPath()).put("compress", false).build(), + // repositoriesMetadata.repository("custom-repo").settings()); + } + + private void addClusterLevelReadOnlyBlock() throws InterruptedException, ExecutionException { + updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), true).build()); + assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); } - private void resetShardLimits() { - // Step - 5 Reset the cluster settings + private void updatePersistentSettings(Settings settings) throws ExecutionException, InterruptedException { ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest(); - resetRequest.transientSettings( - Settings.builder().putNull(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()).putNull(SETTING_MAX_SHARDS_PER_CLUSTER_KEY) + resetRequest.persistentSettings(settings); + assertAcked(client().admin().cluster().updateSettings(resetRequest).get()); + } + + private void verifyRestoredIndexTemplate() { + Map indexTemplateMetadataMap = clusterService().state().metadata().templates(); + assertEquals(1, indexTemplateMetadataMap.size()); + assertEquals(Arrays.asList("pattern-1", "log-*"), indexTemplateMetadataMap.get("my-template").patterns()); + assertEquals( + Settings.builder() // <1> + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 1) + .build(), + indexTemplateMetadataMap.get("my-template").settings() ); + } - try { - client().admin().cluster().updateSettings(resetRequest).get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + private static void putIndexTemplate() { + PutIndexTemplateRequest request = new PutIndexTemplateRequest("my-template"); // <1> + request.patterns(Arrays.asList("pattern-1", "log-*")); // <2> + + request.settings( + Settings.builder() // <1> + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 1) + ); + assertTrue(client().admin().indices().putTemplate(request).actionGet().isAcknowledged()); } + private static void updateIndexBlock(boolean value, String secondIndexName) throws InterruptedException, ExecutionException { + assertAcked( + client().admin() + .indices() + .updateSettings( + new UpdateSettingsRequest(Settings.builder().put(INDEX_READ_ONLY_SETTING.getKey(), value).build(), secondIndexName) + ) + .get() + ); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java new file mode 100644 index 0000000000000..b7b3f1d14f422 --- /dev/null +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java @@ -0,0 +1,133 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.remotestore; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.indices.replication.SegmentReplicationState; +import org.opensearch.indices.replication.SegmentReplicationTarget; +import org.opensearch.indices.replication.SegmentReplicationTargetService; +import org.opensearch.indices.replication.common.ReplicationCollection; +import org.opensearch.test.InternalTestCluster; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.nio.file.Path; +import java.util.Optional; +import java.util.Set; + +/** + * This class runs tests with remote store + segRep while blocking file downloads + */ +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +public class SegmentReplicationUsingRemoteStoreDisruptionIT extends AbstractRemoteStoreMockRepositoryIntegTestCase { + + @Override + public Settings indexSettings() { + return remoteStoreIndexSettings(1); + } + + @Override + protected boolean addMockInternalEngine() { + return false; + } + + public void testCancelReplicationWhileSyncingSegments() throws Exception { + Path location = randomRepoPath().toAbsolutePath(); + setup(location, 0d, "metadata", Long.MAX_VALUE, 1); + + final Set dataNodeNames = internalCluster().getDataNodeNames(); + final String replicaNode = getNode(dataNodeNames, false); + final String primaryNode = getNode(dataNodeNames, true); + + SegmentReplicationTargetService targetService = internalCluster().getInstance(SegmentReplicationTargetService.class, replicaNode); + ensureGreen(INDEX_NAME); + blockNodeOnAnySegmentFile(REPOSITORY_NAME, replicaNode); + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + indexSingleDoc(); + refresh(INDEX_NAME); + waitForBlock(replicaNode, REPOSITORY_NAME, TimeValue.timeValueSeconds(10)); + final SegmentReplicationState state = targetService.getOngoingEventSegmentReplicationState(indexShard.shardId()); + assertEquals(SegmentReplicationState.Stage.GET_FILES, state.getStage()); + ReplicationCollection.ReplicationRef segmentReplicationTargetReplicationRef = targetService.get( + state.getReplicationId() + ); + final SegmentReplicationTarget segmentReplicationTarget = segmentReplicationTargetReplicationRef.get(); + // close the target ref here otherwise it will hold a refcount + segmentReplicationTargetReplicationRef.close(); + assertNotNull(segmentReplicationTarget); + assertTrue(segmentReplicationTarget.refCount() > 0); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode)); + assertBusy(() -> { + assertTrue(indexShard.routingEntry().primary()); + assertNull(targetService.getOngoingEventSegmentReplicationState(indexShard.shardId())); + assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); + }); + unblockNode(REPOSITORY_NAME, replicaNode); + cleanupRepo(); + } + + public void testCancelReplicationWhileFetchingMetadata() throws Exception { + Path location = randomRepoPath().toAbsolutePath(); + setup(location, 0d, "metadata", Long.MAX_VALUE, 1); + + final Set dataNodeNames = internalCluster().getDataNodeNames(); + final String replicaNode = getNode(dataNodeNames, false); + final String primaryNode = getNode(dataNodeNames, true); + + SegmentReplicationTargetService targetService = internalCluster().getInstance(SegmentReplicationTargetService.class, replicaNode); + ensureGreen(INDEX_NAME); + blockNodeOnAnyFiles(REPOSITORY_NAME, replicaNode); + final IndexShard indexShard = getIndexShard(replicaNode, INDEX_NAME); + indexSingleDoc(); + refresh(INDEX_NAME); + waitForBlock(replicaNode, REPOSITORY_NAME, TimeValue.timeValueSeconds(10)); + final SegmentReplicationState state = targetService.getOngoingEventSegmentReplicationState(indexShard.shardId()); + assertEquals(SegmentReplicationState.Stage.GET_CHECKPOINT_INFO, state.getStage()); + ReplicationCollection.ReplicationRef segmentReplicationTargetReplicationRef = targetService.get( + state.getReplicationId() + ); + final SegmentReplicationTarget segmentReplicationTarget = segmentReplicationTargetReplicationRef.get(); + // close the target ref here otherwise it will hold a refcount + segmentReplicationTargetReplicationRef.close(); + assertNotNull(segmentReplicationTarget); + assertTrue(segmentReplicationTarget.refCount() > 0); + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNode)); + assertBusy(() -> { + assertTrue(indexShard.routingEntry().primary()); + assertNull(targetService.getOngoingEventSegmentReplicationState(indexShard.shardId())); + assertEquals("Target should be closed", 0, segmentReplicationTarget.refCount()); + }); + unblockNode(REPOSITORY_NAME, replicaNode); + cleanupRepo(); + } + + private String getNode(Set dataNodeNames, boolean primary) { + assertEquals(2, dataNodeNames.size()); + for (String name : dataNodeNames) { + final IndexShard indexShard = getIndexShard(name, INDEX_NAME); + if (indexShard.routingEntry().primary() == primary) { + return name; + } + } + return null; + } + + private IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java b/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java index 5f794d2abf878..ef73438114079 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/profile/query/QueryProfilerIT.java @@ -32,6 +32,8 @@ package org.opensearch.search.profile.query; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + import org.apache.lucene.tests.util.English; import org.opensearch.action.index.IndexRequestBuilder; import org.opensearch.action.search.MultiSearchResponse; @@ -40,20 +42,23 @@ import org.opensearch.action.search.SearchType; import org.opensearch.action.search.ShardSearchFailure; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.SearchHit; import org.opensearch.search.profile.ProfileResult; import org.opensearch.search.profile.ProfileShardResult; import org.opensearch.search.sort.SortOrder; -import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.ParameterizedOpenSearchIntegTestCase; import java.util.Arrays; +import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING; import static org.opensearch.search.profile.query.RandomQueryGenerator.randomQueryBuilder; import static org.hamcrest.Matchers.emptyOrNullString; import static org.hamcrest.Matchers.equalTo; @@ -61,8 +66,32 @@ import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; + +public class QueryProfilerIT extends ParameterizedOpenSearchIntegTestCase { + private final boolean concurrentSearchEnabled; + private static final String MAX_PREFIX = "max_"; + private static final String MIN_PREFIX = "min_"; + private static final String AVG_PREFIX = "avg_"; + private static final String TIMING_TYPE_COUNT_SUFFIX = "_count"; + + public QueryProfilerIT(Settings settings, boolean concurrentSearchEnabled) { + super(settings); + this.concurrentSearchEnabled = concurrentSearchEnabled; + } -public class QueryProfilerIT extends OpenSearchIntegTestCase { + @ParametersFactory + public static Collection parameters() { + return Arrays.asList( + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), false).build(), false }, + new Object[] { Settings.builder().put(CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING.getKey(), true).build(), true } + ); + } + + @Override + protected Settings featureFlagSettings() { + return Settings.builder().put(super.featureFlagSettings()).put(FeatureFlags.CONCURRENT_SEGMENT_SEARCH, "true").build(); + } /** * This test simply checks to make sure nothing crashes. Test indexes 100-150 documents, @@ -229,6 +258,7 @@ public void testSimpleMatch() throws Exception { assertEquals(result.getLuceneDescription(), "field1:one"); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -271,6 +301,7 @@ public void testBool() throws Exception { assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); assertEquals(result.getProfiledChildren().size(), 2); + assertQueryProfileResult(result); // Check the children List children = result.getProfiledChildren(); @@ -282,12 +313,14 @@ public void testBool() throws Exception { assertThat(childProfile.getTime(), greaterThan(0L)); assertNotNull(childProfile.getTimeBreakdown()); assertEquals(childProfile.getProfiledChildren().size(), 0); + assertQueryProfileResult(childProfile); childProfile = children.get(1); assertEquals(childProfile.getQueryName(), "TermQuery"); assertEquals(childProfile.getLuceneDescription(), "field1:two"); assertThat(childProfile.getTime(), greaterThan(0L)); assertNotNull(childProfile.getTimeBreakdown()); + assertQueryProfileResult(childProfile); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -330,6 +363,7 @@ public void testEmptyBool() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -375,6 +409,7 @@ public void testCollapsingBool() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -415,6 +450,90 @@ public void testBoosting() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); + } + + CollectorResult result = searchProfiles.getCollectorResult(); + assertThat(result.getName(), is(not(emptyOrNullString()))); + assertThat(result.getTime(), greaterThan(0L)); + } + } + } + + public void testSearchLeafForItsLeavesAndRewriteQuery() throws Exception { + createIndex("test"); + ensureGreen(); + + int numDocs = 122; + IndexRequestBuilder[] docs = new IndexRequestBuilder[numDocs]; + for (int i = 0; i < numDocs; i++) { + docs[i] = client().prepareIndex("test").setId(String.valueOf(i)).setSource("field1", English.intToEnglish(i), "field2", i); + } + + List terms = Arrays.asList("zero", "zero", "one"); + + indexRandom(true, docs); + + refresh(); + + QueryBuilder q = QueryBuilders.boostingQuery( + QueryBuilders.idsQuery().addIds(String.valueOf(randomInt()), String.valueOf(randomInt())), + QueryBuilders.termsQuery("field1", terms) + ).boost(randomFloat()).negativeBoost(randomFloat()); + logger.info("Query: {}", q); + + SearchResponse resp = client().prepareSearch() + .setQuery(q) + .setTrackTotalHits(true) + .setProfile(true) + .setSearchType(SearchType.QUERY_THEN_FETCH) + .get(); + + assertNotNull("Profile response element should not be null", resp.getProfileResults()); + assertThat("Profile response should not be an empty array", resp.getProfileResults().size(), not(0)); + + for (Map.Entry shardResult : resp.getProfileResults().entrySet()) { + assertThat(shardResult.getValue().getNetworkTime().getInboundNetworkTime(), greaterThanOrEqualTo(0L)); + assertThat(shardResult.getValue().getNetworkTime().getOutboundNetworkTime(), greaterThanOrEqualTo(0L)); + for (QueryProfileShardResult searchProfiles : shardResult.getValue().getQueryProfileResults()) { + List results = searchProfiles.getQueryResults(); + for (ProfileResult result : results) { + assertNotNull(result.getQueryName()); + assertNotNull(result.getLuceneDescription()); + assertThat(result.getTime(), greaterThan(0L)); + Map breakdown = result.getTimeBreakdown(); + Long maxSliceTime = result.getMaxSliceTime(); + Long minSliceTime = result.getMinSliceTime(); + Long avgSliceTime = result.getAvgSliceTime(); + if (concurrentSearchEnabled && results.get(0).equals(result)) { + assertNotNull(maxSliceTime); + assertNotNull(minSliceTime); + assertNotNull(avgSliceTime); + assertThat(breakdown.size(), equalTo(66)); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + if (queryTimingType != QueryTimingType.CREATE_WEIGHT) { + String maxTimingType = MAX_PREFIX + queryTimingType; + String minTimingType = MIN_PREFIX + queryTimingType; + String avgTimingType = AVG_PREFIX + queryTimingType; + assertNotNull(breakdown.get(maxTimingType)); + assertNotNull(breakdown.get(minTimingType)); + assertNotNull(breakdown.get(avgTimingType)); + assertNotNull(breakdown.get(maxTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(minTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(avgTimingType + TIMING_TYPE_COUNT_SUFFIX)); + } + } + } else if (concurrentSearchEnabled) { + assertThat(maxSliceTime, equalTo(0L)); + assertThat(minSliceTime, equalTo(0L)); + assertThat(avgSliceTime, equalTo(0L)); + assertThat(breakdown.size(), equalTo(27)); + } else { + assertThat(maxSliceTime, is(nullValue())); + assertThat(minSliceTime, is(nullValue())); + assertThat(avgSliceTime, is(nullValue())); + assertThat(breakdown.size(), equalTo(27)); + } } CollectorResult result = searchProfiles.getCollectorResult(); @@ -455,6 +574,7 @@ public void testDisMaxRange() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -494,6 +614,7 @@ public void testRange() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -547,6 +668,7 @@ public void testPhrase() throws Exception { assertNotNull(result.getLuceneDescription()); assertThat(result.getTime(), greaterThan(0L)); assertNotNull(result.getTimeBreakdown()); + assertQueryProfileResult(result); } CollectorResult result = searchProfiles.getCollectorResult(); @@ -579,4 +701,35 @@ public void testNoProfile() throws Exception { assertThat("Profile response element should be an empty map", resp.getProfileResults().size(), equalTo(0)); } + private void assertQueryProfileResult(ProfileResult result) { + Map breakdown = result.getTimeBreakdown(); + Long maxSliceTime = result.getMaxSliceTime(); + Long minSliceTime = result.getMinSliceTime(); + Long avgSliceTime = result.getAvgSliceTime(); + if (concurrentSearchEnabled) { + assertNotNull(maxSliceTime); + assertNotNull(minSliceTime); + assertNotNull(avgSliceTime); + assertThat(breakdown.size(), equalTo(66)); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + if (queryTimingType != QueryTimingType.CREATE_WEIGHT) { + String maxTimingType = MAX_PREFIX + queryTimingType; + String minTimingType = MIN_PREFIX + queryTimingType; + String avgTimingType = AVG_PREFIX + queryTimingType; + assertNotNull(breakdown.get(maxTimingType)); + assertNotNull(breakdown.get(minTimingType)); + assertNotNull(breakdown.get(avgTimingType)); + assertNotNull(breakdown.get(maxTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(minTimingType + TIMING_TYPE_COUNT_SUFFIX)); + assertNotNull(breakdown.get(avgTimingType + TIMING_TYPE_COUNT_SUFFIX)); + } + } + } else { + assertThat(maxSliceTime, is(nullValue())); + assertThat(minSliceTime, is(nullValue())); + assertThat(avgSliceTime, is(nullValue())); + assertThat(breakdown.size(), equalTo(27)); + } + } + } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 69efea186d927..e9bfa358103c8 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -57,6 +57,7 @@ import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; import org.opensearch.node.NodesResourceUsageStats; +import org.opensearch.repositories.RepositoriesStats; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -146,6 +147,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private NodesResourceUsageStats resourceUsageStats; + @Nullable + private RepositoriesStats repositoriesStats; + public NodeStats(StreamInput in) throws IOException { super(in); timestamp = in.readVLong(); @@ -202,11 +206,16 @@ public NodeStats(StreamInput in) throws IOException { } else { searchPipelineStats = null; } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { resourceUsageStats = in.readOptionalWriteable(NodesResourceUsageStats::new); } else { resourceUsageStats = null; } + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); + } else { + repositoriesStats = null; + } } public NodeStats( @@ -234,7 +243,8 @@ public NodeStats( @Nullable WeightedRoutingStats weightedRoutingStats, @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, - @Nullable SearchPipelineStats searchPipelineStats + @Nullable SearchPipelineStats searchPipelineStats, + @Nullable RepositoriesStats repositoriesStats ) { super(node); this.timestamp = timestamp; @@ -261,6 +271,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.repositoriesStats = repositoriesStats; } public long getTimestamp() { @@ -403,6 +414,11 @@ public SearchPipelineStats getSearchPipelineStats() { return searchPipelineStats; } + @Nullable + public RepositoriesStats getRepositoriesStats() { + return repositoriesStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -446,9 +462,12 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeOptionalWriteable(searchPipelineStats); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeOptionalWriteable(repositoriesStats); + } } @Override @@ -542,6 +561,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getRepositoriesStats() != null) { + getRepositoriesStats().toXContent(builder, params); + } return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 99c9fb2d1e26a..88dff20354aa2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -214,7 +214,8 @@ public enum Metric { FILE_CACHE_STATS("file_cache"), TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), - RESOURCE_USAGE_STATS("resource_usage_stats"); + RESOURCE_USAGE_STATS("resource_usage_stats"), + REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 204157236a282..aa02f8e580f4a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -125,7 +125,8 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.FILE_CACHE_STATS.containedIn(metrics), NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), - NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics) + NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index d8323e209be23..f51fabbfb2388 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -169,6 +169,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java new file mode 100644 index 0000000000000..9cbe2d2ffcb7d --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizer.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilderVisitor; +import org.opensearch.index.query.QueryShapeVisitor; +import org.opensearch.search.aggregations.AggregatorFactories; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.SortBuilder; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.List; +import java.util.ListIterator; + +/** + * Class to categorize the search queries based on the type and increment the relevant counters. + * Class also logs the query shape. + */ +final class SearchQueryCategorizer { + + private static final Logger log = LogManager.getLogger(SearchQueryCategorizer.class); + + final SearchQueryCounters searchQueryCounters; + + public SearchQueryCategorizer(MetricsRegistry metricsRegistry) { + searchQueryCounters = new SearchQueryCounters(metricsRegistry); + } + + public void categorize(SearchSourceBuilder source) { + QueryBuilder topLevelQueryBuilder = source.query(); + + logQueryShape(topLevelQueryBuilder); + incrementQueryTypeCounters(topLevelQueryBuilder); + incrementQueryAggregationCounters(source.aggregations()); + incrementQuerySortCounters(source.sorts()); + } + + private void incrementQuerySortCounters(List> sorts) { + if (sorts != null && sorts.size() > 0) { + for (ListIterator> it = sorts.listIterator(); it.hasNext();) { + SortBuilder sortBuilder = it.next(); + String sortOrder = sortBuilder.order().toString(); + searchQueryCounters.sortCounter.add(1, Tags.create().addTag("sort_order", sortOrder)); + } + } + } + + private void incrementQueryAggregationCounters(AggregatorFactories.Builder aggregations) { + if (aggregations != null) { + searchQueryCounters.aggCounter.add(1); + } + } + + private void incrementQueryTypeCounters(QueryBuilder topLevelQueryBuilder) { + if (topLevelQueryBuilder == null) { + return; + } + QueryBuilderVisitor searchQueryVisitor = new SearchQueryCategorizingVisitor(searchQueryCounters); + topLevelQueryBuilder.visit(searchQueryVisitor); + } + + private void logQueryShape(QueryBuilder topLevelQueryBuilder) { + if (topLevelQueryBuilder == null) { + return; + } + QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); + topLevelQueryBuilder.visit(shapeVisitor); + log.debug("Query shape : {}", shapeVisitor.prettyPrintTree(" ")); + } + +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java new file mode 100644 index 0000000000000..98f0169e69a5c --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCategorizingVisitor.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.apache.lucene.search.BooleanClause; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.QueryBuilderVisitor; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.RegexpQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.telemetry.metrics.tags.Tags; + +/** + * Class to visit the querybuilder tree and also track the level information. + * Increments the counters related to Search Query type. + */ +final class SearchQueryCategorizingVisitor implements QueryBuilderVisitor { + private static final String LEVEL_TAG = "level"; + private final int level; + private final SearchQueryCounters searchQueryCounters; + + public SearchQueryCategorizingVisitor(SearchQueryCounters searchQueryCounters) { + this(searchQueryCounters, 0); + } + + private SearchQueryCategorizingVisitor(SearchQueryCounters counters, int level) { + this.searchQueryCounters = counters; + this.level = level; + } + + public void accept(QueryBuilder qb) { + if (qb instanceof BoolQueryBuilder) { + searchQueryCounters.boolCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof FunctionScoreQueryBuilder) { + searchQueryCounters.functionScoreCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MatchQueryBuilder) { + searchQueryCounters.matchCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MatchPhraseQueryBuilder) { + searchQueryCounters.matchPhrasePrefixCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof MultiMatchQueryBuilder) { + searchQueryCounters.multiMatchCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof QueryStringQueryBuilder) { + searchQueryCounters.queryStringQueryCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof RangeQueryBuilder) { + searchQueryCounters.rangeCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof RegexpQueryBuilder) { + searchQueryCounters.regexCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof TermQueryBuilder) { + searchQueryCounters.termCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else if (qb instanceof WildcardQueryBuilder) { + searchQueryCounters.wildcardCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } else { + searchQueryCounters.otherQueryCounter.add(1, Tags.create().addTag(LEVEL_TAG, level)); + } + } + + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + return new SearchQueryCategorizingVisitor(searchQueryCounters, level + 1); + } +} diff --git a/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java b/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java new file mode 100644 index 0000000000000..7e0259af07701 --- /dev/null +++ b/server/src/main/java/org/opensearch/action/search/SearchQueryCounters.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.MetricsRegistry; + +/** + * Class contains all the Counters related to search query types. + */ +final class SearchQueryCounters { + private static final String UNIT = "1"; + private final MetricsRegistry metricsRegistry; + + // Counters related to Query types + public final Counter aggCounter; + public final Counter boolCounter; + public final Counter functionScoreCounter; + public final Counter matchCounter; + public final Counter matchPhrasePrefixCounter; + public final Counter multiMatchCounter; + public final Counter otherQueryCounter; + public final Counter queryStringQueryCounter; + public final Counter rangeCounter; + public final Counter regexCounter; + + public final Counter sortCounter; + public final Counter skippedCounter; + public final Counter termCounter; + public final Counter totalCounter; + public final Counter wildcardCounter; + + public SearchQueryCounters(MetricsRegistry metricsRegistry) { + this.metricsRegistry = metricsRegistry; + this.aggCounter = metricsRegistry.createCounter( + "search.query.type.agg.count", + "Counter for the number of top level agg search queries", + UNIT + ); + this.boolCounter = metricsRegistry.createCounter( + "search.query.type.bool.count", + "Counter for the number of top level and nested bool search queries", + UNIT + ); + this.functionScoreCounter = metricsRegistry.createCounter( + "search.query.type.functionscore.count", + "Counter for the number of top level and nested function score search queries", + UNIT + ); + this.matchCounter = metricsRegistry.createCounter( + "search.query.type.match.count", + "Counter for the number of top level and nested match search queries", + UNIT + ); + this.matchPhrasePrefixCounter = metricsRegistry.createCounter( + "search.query.type.matchphrase.count", + "Counter for the number of top level and nested match phrase prefix search queries", + UNIT + ); + this.multiMatchCounter = metricsRegistry.createCounter( + "search.query.type.multimatch.count", + "Counter for the number of top level and nested multi match search queries", + UNIT + ); + this.otherQueryCounter = metricsRegistry.createCounter( + "search.query.type.other.count", + "Counter for the number of top level and nested search queries that do not match any other categories", + UNIT + ); + this.queryStringQueryCounter = metricsRegistry.createCounter( + "search.query.type.querystringquery.count", + "Counter for the number of top level and nested queryStringQuery search queries", + UNIT + ); + this.rangeCounter = metricsRegistry.createCounter( + "search.query.type.range.count", + "Counter for the number of top level and nested range search queries", + UNIT + ); + this.regexCounter = metricsRegistry.createCounter( + "search.query.type.regex.count", + "Counter for the number of top level and nested regex search queries", + UNIT + ); + this.skippedCounter = metricsRegistry.createCounter( + "search.query.type.skipped.count", + "Counter for the number queries skipped due to error", + UNIT + ); + this.sortCounter = metricsRegistry.createCounter( + "search.query.type.sort.count", + "Counter for the number of top level sort search queries", + UNIT + ); + this.termCounter = metricsRegistry.createCounter( + "search.query.type.term.count", + "Counter for the number of top level and nested term search queries", + UNIT + ); + this.totalCounter = metricsRegistry.createCounter( + "search.query.type.total.count", + "Counter for the number of top level and nested search queries", + UNIT + ); + this.wildcardCounter = metricsRegistry.createCounter( + "search.query.type.wildcard.count", + "Counter for the number of top level and nested wildcard search queries", + UNIT + ); + } +} diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 284f71bd9da62..a6fb8453af4ff 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -88,6 +88,7 @@ import org.opensearch.search.profile.SearchProfileShardResults; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.RemoteClusterAware; import org.opensearch.transport.RemoteClusterService; @@ -137,6 +138,13 @@ public class TransportSearchAction extends HandledTransportAction SEARCH_QUERY_METRICS_ENABLED_SETTING = Setting.boolSetting( + "search.query.metrics.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + // cluster level setting for timeout based search cancellation. If search request level parameter is present then that will take // precedence over the cluster setting value public static final String SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING_KEY = "search.cancel_after_time_interval"; @@ -177,8 +185,14 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -211,6 +226,17 @@ public TransportSearchAction( this.isRequestStatsEnabled = clusterService.getClusterSettings().get(SEARCH_REQUEST_STATS_ENABLED); clusterService.getClusterSettings().addSettingsUpdateConsumer(SEARCH_REQUEST_STATS_ENABLED, this::setIsRequestStatsEnabled); this.searchRequestStats = searchRequestStats; + this.metricsRegistry = metricsRegistry; + this.searchQueryMetricsEnabled = clusterService.getClusterSettings().get(SEARCH_QUERY_METRICS_ENABLED_SETTING); + clusterService.getClusterSettings() + .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); + } + + private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { + this.searchQueryMetricsEnabled = searchQueryMetricsEnabled; + if ((this.searchQueryMetricsEnabled == true) && this.searchQueryCategorizer == null) { + this.searchQueryCategorizer = new SearchQueryCategorizer(metricsRegistry); + } } private void setIsRequestStatsEnabled(boolean isRequestStatsEnabled) { @@ -489,6 +515,14 @@ private void executeRequest( return; } + if (searchQueryMetricsEnabled) { + try { + searchQueryCategorizer.categorize(searchRequest.source()); + } catch (Exception e) { + logger.error("Error while trying to categorize the query.", e); + } + } + ActionListener rewriteListener = ActionListener.wrap(source -> { if (source != searchRequest.source()) { // only set it if it changed - we don't allow null values to be set but it might be already null. this way we catch diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java index 2ee3e9557b354..0f6646d37f950 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java @@ -49,6 +49,9 @@ public interface BlobStore extends Closeable { */ BlobContainer blobContainer(BlobPath path); + /** + * Returns statistics on the count of operations that have been performed on this blob store + */ /** * Returns statistics on the count of operations that have been performed on this blob store */ @@ -56,8 +59,36 @@ default Map stats() { return Collections.emptyMap(); } + /** + * Returns details statistics of operations that have been performed on this blob store + */ + default Map> extendedStats() { + return Collections.emptyMap(); + } + /** * Reload the blob store inplace */ default void reload(RepositoryMetadata repositoryMetadata) {} + + /** + * Metrics for BlobStore interactions + */ + enum Metric { + REQUEST_SUCCESS("request_success_total"), + REQUEST_FAILURE("request_failures_total"), + REQUEST_LATENCY("request_time_in_millis"), + RETRY_COUNT("request_retry_count_total"); + + private String metricName; + + Metric(String name) { + this.metricName = name; + } + + public String metricName() { + return this.metricName; + } + } + } diff --git a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java index 4d2d69e473438..a18ca8b9d5c39 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java @@ -75,6 +75,16 @@ public Map stats() { return blobStore.stats(); } + /** + * Retrieves extended statistics about the BlobStore. Delegates the call to the underlying BlobStore's extendedStats() method. + * + * @return A map containing extended statistics about the BlobStore. + */ + @Override + public Map> extendedStats() { + return blobStore.extendedStats(); + } + /** * Closes the EncryptedBlobStore by decrementing the reference count of the CryptoManager and closing the * underlying BlobStore. This ensures proper cleanup of resources. diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 90f91dcb7c553..76883c200542e 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -377,6 +377,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING, TransportSearchAction.SEARCH_REQUEST_STATS_ENABLED, TransportSearchAction.SEARCH_PHASE_TOOK_ENABLED, + TransportSearchAction.SEARCH_QUERY_METRICS_ENABLED_SETTING, RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE, SniffConnectionStrategy.REMOTE_CONNECTIONS_PER_CLUSTER, RemoteClusterService.REMOTE_INITIAL_CONNECTION_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index bba9abbd67ed6..13d2f43d6b7f7 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -159,38 +159,44 @@ public void start( PersistedState remotePersistedState = null; boolean success = false; try { - ClusterState clusterState = prepareInitialClusterState( + ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) + .version(lastAcceptedVersion) + .metadata(metadata) + .build(); + + if (DiscoveryNode.isClusterManagerNode(settings) && isRemoteStoreClusterStateEnabled(settings)) { + // If the cluster UUID loaded from local is unknown (_na_) then fetch the best state from remote + // If there is no valid state on remote, continue with initial empty state + // If there is a valid state, then restore index metadata using this state + String lastKnownClusterUUID = ClusterState.UNKNOWN_UUID; + if (ClusterState.UNKNOWN_UUID.equals(clusterState.metadata().clusterUUID())) { + lastKnownClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote( + clusterState.getClusterName().value() + ); + if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { + // Load state from remote + final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( + clusterState, + lastKnownClusterUUID, + false, + new String[] {} + ); + clusterState = remoteRestoreResult.getClusterState(); + } + } + remotePersistedState = new RemotePersistedState(remoteClusterStateService, lastKnownClusterUUID); + } + + // Recovers Cluster and Index level blocks + clusterState = prepareInitialClusterState( transportService, clusterService, - ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(settings)) - .version(lastAcceptedVersion) - .metadata(upgradeMetadataForNode(metadata, metadataIndexUpgradeService, metadataUpgrader)) + ClusterState.builder(clusterState) + .metadata(upgradeMetadataForNode(clusterState.metadata(), metadataIndexUpgradeService, metadataUpgrader)) .build() ); if (DiscoveryNode.isClusterManagerNode(settings)) { - if (isRemoteStoreClusterStateEnabled(settings)) { - // If the cluster UUID loaded from local is unknown (_na_) then fetch the best state from remote - // If there is no valid state on remote, continue with initial empty state - // If there is a valid state, then restore index metadata using this state - String lastKnownClusterUUID = ClusterState.UNKNOWN_UUID; - if (ClusterState.UNKNOWN_UUID.equals(clusterState.metadata().clusterUUID())) { - lastKnownClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote( - clusterState.getClusterName().value() - ); - if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { - // Load state from remote - final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( - clusterState, - lastKnownClusterUUID, - false, - new String[] {} - ); - clusterState = remoteRestoreResult.getClusterState(); - } - } - remotePersistedState = new RemotePersistedState(remoteClusterStateService, lastKnownClusterUUID); - } persistedState = new LucenePersistedState(persistedClusterStateService, currentTerm, clusterState); } else { persistedState = new AsyncLucenePersistedState( diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 40b16f3d6323b..97b37d9532f85 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -33,6 +33,9 @@ */ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { + public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest. + public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file. + private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); private static final ParseField CLUSTER_UUID_FIELD = new ParseField("cluster_uuid"); @@ -40,6 +43,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { private static final ParseField OPENSEARCH_VERSION_FIELD = new ParseField("opensearch_version"); private static final ParseField NODE_ID_FIELD = new ParseField("node_id"); private static final ParseField COMMITTED_FIELD = new ParseField("committed"); + private static final ParseField CODEC_VERSION_FIELD = new ParseField("codec_version"); + private static final ParseField GLOBAL_METADATA_FIELD = new ParseField("global_metadata"); private static final ParseField INDICES_FIELD = new ParseField("indices"); private static final ParseField PREVIOUS_CLUSTER_UUID = new ParseField("previous_cluster_uuid"); private static final ParseField CLUSTER_UUID_COMMITTED = new ParseField("cluster_uuid_committed"); @@ -84,7 +89,33 @@ private static boolean clusterUUIDCommitted(Object[] fields) { return (boolean) fields[9]; } - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + private static int codecVersion(Object[] fields) { + return (int) fields[10]; + } + + private static String globalMetadataFileName(Object[] fields) { + return (String) fields[11]; + } + + private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> new ClusterMetadataManifest( + term(fields), + version(fields), + clusterUUID(fields), + stateUUID(fields), + opensearchVersion(fields), + nodeId(fields), + committed(fields), + CODEC_V0, + null, + indices(fields), + previousClusterUUID(fields), + clusterUUIDCommitted(fields) + ) + ); + + private static final ConstructingObjectParser PARSER_V1 = new ConstructingObjectParser<>( "cluster_metadata_manifest", fields -> new ClusterMetadataManifest( term(fields), @@ -94,29 +125,45 @@ private static boolean clusterUUIDCommitted(Object[] fields) { opensearchVersion(fields), nodeId(fields), committed(fields), + codecVersion(fields), + globalMetadataFileName(fields), indices(fields), previousClusterUUID(fields), clusterUUIDCommitted(fields) ) ); + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V1; + static { - PARSER.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); - PARSER.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); - PARSER.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); - PARSER.declareObjectArray( + declareParser(PARSER_V0, CODEC_V0); + declareParser(PARSER_V1, CODEC_V1); + } + + private static void declareParser(ConstructingObjectParser parser, long codec_version) { + parser.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); + parser.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); + parser.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); + parser.declareObjectArray( ConstructingObjectParser.constructorArg(), (p, c) -> UploadedIndexMetadata.fromXContent(p), INDICES_FIELD ); - PARSER.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + parser.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + + if (codec_version >= CODEC_V1) { + parser.declareInt(ConstructingObjectParser.constructorArg(), CODEC_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), GLOBAL_METADATA_FIELD); + } } + private final int codecVersion; + private final String globalMetadataFileName; private final List indices; private final long clusterTerm; private final long stateVersion; @@ -168,6 +215,14 @@ public boolean isClusterUUIDCommitted() { return clusterUUIDCommitted; } + public int getCodecVersion() { + return codecVersion; + } + + public String getGlobalMetadataFileName() { + return globalMetadataFileName; + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -176,6 +231,8 @@ public ClusterMetadataManifest( Version opensearchVersion, String nodeId, boolean committed, + int codecVersion, + String globalMetadataFileName, List indices, String previousClusterUUID, boolean clusterUUIDCommitted @@ -187,6 +244,8 @@ public ClusterMetadataManifest( this.opensearchVersion = opensearchVersion; this.nodeId = nodeId; this.committed = committed; + this.codecVersion = codecVersion; + this.globalMetadataFileName = globalMetadataFileName; this.indices = Collections.unmodifiableList(indices); this.previousClusterUUID = previousClusterUUID; this.clusterUUIDCommitted = clusterUUIDCommitted; @@ -203,6 +262,13 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.codecVersion = in.readInt(); + this.globalMetadataFileName = in.readString(); + } else { + this.codecVersion = CODEC_V0; // Default codec + this.globalMetadataFileName = null; + } } public static Builder builder() { @@ -231,6 +297,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endArray(); builder.field(PREVIOUS_CLUSTER_UUID.getPreferredName(), getPreviousClusterUUID()); builder.field(CLUSTER_UUID_COMMITTED.getPreferredName(), isClusterUUIDCommitted()); + if (onOrAfterCodecVersion(CODEC_V1)) { + builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); + builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); + } return builder; } @@ -246,6 +316,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeInt(codecVersion); + out.writeString(globalMetadataFileName); + } } @Override @@ -266,12 +340,16 @@ public boolean equals(Object o) { && Objects.equals(nodeId, that.nodeId) && Objects.equals(committed, that.committed) && Objects.equals(previousClusterUUID, that.previousClusterUUID) - && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted); + && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted) + && Objects.equals(globalMetadataFileName, that.globalMetadataFileName) + && Objects.equals(codecVersion, that.codecVersion); } @Override public int hashCode() { return Objects.hash( + codecVersion, + globalMetadataFileName, indices, clusterTerm, stateVersion, @@ -290,8 +368,16 @@ public String toString() { return Strings.toString(MediaTypeRegistry.JSON, this); } + public boolean onOrAfterCodecVersion(int codecVersion) { + return this.codecVersion >= codecVersion; + } + + public static ClusterMetadataManifest fromXContentV0(XContentParser parser) throws IOException { + return PARSER_V0.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { - return PARSER.parse(parser, null); + return CURRENT_PARSER.parse(parser, null); } /** @@ -301,6 +387,8 @@ public static ClusterMetadataManifest fromXContent(XContentParser parser) throws */ public static class Builder { + private String globalMetadataFileName; + private int codecVersion; private List indices; private long clusterTerm; private long stateVersion; @@ -317,6 +405,16 @@ public Builder indices(List indices) { return this; } + public Builder codecVersion(int codecVersion) { + this.codecVersion = codecVersion; + return this; + } + + public Builder globalMetadataFileName(String globalMetadataFileName) { + this.globalMetadataFileName = globalMetadataFileName; + return this; + } + public Builder clusterTerm(long clusterTerm) { this.clusterTerm = clusterTerm; return this; @@ -378,6 +476,8 @@ public Builder(ClusterMetadataManifest manifest) { this.opensearchVersion = manifest.opensearchVersion; this.nodeId = manifest.nodeId; this.committed = manifest.committed; + this.globalMetadataFileName = manifest.globalMetadataFileName; + this.codecVersion = manifest.codecVersion; this.indices = new ArrayList<>(manifest.indices); this.previousClusterUUID = manifest.previousClusterUUID; this.clusterUUIDCommitted = manifest.clusterUUIDCommitted; @@ -392,6 +492,8 @@ public ClusterMetadataManifest build() { opensearchVersion, nodeId, committed, + codecVersion, + globalMetadataFileName, indices, previousClusterUUID, clusterUUIDCommitted diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 251b55a31e16c..4702c7dbfe818 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -15,6 +15,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; @@ -55,6 +57,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -80,7 +83,9 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); + // TODO make this two variable as dynamic setting [issue: #10688] public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000; + public static final int GLOBAL_METADATA_UPLOAD_WAIT_MILLIS = 20000; public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", @@ -88,11 +93,27 @@ public class RemoteClusterStateService implements Closeable { IndexMetadata::fromXContent ); + public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "metadata", + METADATA_NAME_FORMAT, + Metadata::fromXContent + ); + + /** + * Manifest format compatible with older codec v0, where codec version was missing. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V0 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV0); + + /** + * Manifest format compatible with codec v1, where we introduced codec versions/global metadata. + */ public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>( "cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContent ); + /** * Used to specify if cluster state metadata should be published to remote store */ @@ -105,9 +126,11 @@ public class RemoteClusterStateService implements Closeable { public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; public static final String INDEX_PATH_TOKEN = "index"; + public static final String GLOBAL_METADATA_PATH_TOKEN = "global-metadata"; public static final String MANIFEST_PATH_TOKEN = "manifest"; public static final String MANIFEST_FILE_PREFIX = "manifest"; - public static final String INDEX_METADATA_FILE_PREFIX = "metadata"; + public static final String METADATA_FILE_PREFIX = "metadata"; + public static final int SPLITED_MANIFEST_FILE_LENGTH = 6; // file name manifest__term__version__C/P__timestamp__codecversion private final String nodeId; private final Supplier repositoriesService; @@ -121,7 +144,17 @@ public class RemoteClusterStateService implements Closeable { private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); private final RemoteStateStats remoteStateStats; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; - public static final int MANIFEST_CURRENT_CODEC_VERSION = 1; + public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; + public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; + + // ToXContent Params with gateway mode. + // We are using gateway context mode to persist all custom metadata. + public static final ToXContent.Params FORMAT_PARAMS; + static { + Map params = new HashMap<>(1); + params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); + FORMAT_PARAMS = new ToXContent.MapParams(params); + } public RemoteClusterStateService( String nodeId, @@ -163,12 +196,22 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri return null; } + // TODO: we can upload global metadata and index metadata in parallel. [issue: #10645] + // Write globalMetadata + String globalMetadataFile = writeGlobalMetadata(clusterState); + // any validations before/after upload ? final List allUploadedIndexMetadata = writeIndexMetadataParallel( clusterState, new ArrayList<>(clusterState.metadata().indices().values()) ); - final ClusterMetadataManifest manifest = uploadManifest(clusterState, allUploadedIndexMetadata, previousClusterUUID, false); + final ClusterMetadataManifest manifest = uploadManifest( + clusterState, + allUploadedIndexMetadata, + previousClusterUUID, + globalMetadataFile, + false + ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); remoteStateStats.stateUploaded(); remoteStateStats.stateUploadTook(durationMillis); @@ -209,6 +252,22 @@ public ClusterMetadataManifest writeIncrementalMetadata( return null; } assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); + + // Write Global Metadata + final boolean updateGlobalMetadata = Metadata.isGlobalStateEquals( + previousClusterState.metadata(), + clusterState.metadata() + ) == false; + String globalMetadataFile; + // For migration case from codec V0 to V1, we have added null check on global metadata file, + // If file is empty and codec is 1 then write global metadata. + if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { + globalMetadataFile = writeGlobalMetadata(clusterState); + } else { + globalMetadataFile = previousManifest.getGlobalMetadataFileName(); + } + + // Write Index Metadata final Map previousStateIndexMetadataVersionByName = new HashMap<>(); for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion()); @@ -251,6 +310,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), + globalMetadataFile, false ); deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); @@ -278,6 +338,59 @@ public ClusterMetadataManifest writeIncrementalMetadata( return manifest; } + /** + * Uploads provided ClusterState's global Metadata to remote store in parallel. + * The call is blocking so the method waits for upload to finish and then return. + * + * @param clusterState current ClusterState + * @return String file name where globalMetadata file is stored. + */ + private String writeGlobalMetadata(ClusterState clusterState) throws IOException { + + AtomicReference result = new AtomicReference(); + final BlobContainer globalMetadataContainer = globalMetadataContainer( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + final String globalMetadataFilename = globalMetadataFileName(clusterState.metadata()); + + // latch to wait until upload is not finished + CountDownLatch latch = new CountDownLatch(1); + + LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { + logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); + result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); + }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); + + GLOBAL_METADATA_FORMAT.writeAsync( + clusterState.metadata(), + globalMetadataContainer, + globalMetadataFilename, + blobStoreRepository.getCompressor(), + completionListener, + FORMAT_PARAMS + ); + + try { + if (latch.await(GLOBAL_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + // TODO: We should add metrics where transfer is timing out. [Issue: #10687] + GlobalMetadataTransferException ex = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") + ); + throw ex; + } + } catch (InterruptedException ex) { + GlobalMetadataTransferException exception = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete - %s"), + ex + ); + Thread.currentThread().interrupt(); + throw exception; + } + + return result.get(); + } + /** * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. * @@ -386,7 +499,8 @@ private void writeIndexMetadataAsync( indexMetadataContainer, indexMetadataFilename, blobStoreRepository.getCompressor(), - completionListener + completionListener, + FORMAT_PARAMS ); } @@ -403,6 +517,7 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), + previousManifest.getGlobalMetadataFileName(), true ); deleteStaleClusterUUIDs(clusterState, committedManifest); @@ -431,6 +546,7 @@ private ClusterMetadataManifest uploadManifest( ClusterState clusterState, List uploadedIndexMetadata, String previousClusterUUID, + String globalClusterMetadataFileName, boolean committed ) throws IOException { synchronized (this) { @@ -443,6 +559,8 @@ private ClusterMetadataManifest uploadManifest( Version.CURRENT, nodeId, committed, + MANIFEST_CURRENT_CODEC_VERSION, + globalClusterMetadataFileName, uploadedIndexMetadata, previousClusterUUID, clusterState.metadata().clusterUUIDCommitted() @@ -474,6 +592,12 @@ private BlobContainer indexMetadataContainer(String clusterName, String clusterU .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(INDEX_PATH_TOKEN).add(indexUUID)); } + private BlobContainer globalMetadataContainer(String clusterName, String clusterUUID) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/ + return blobStoreRepository.blobStore() + .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(GLOBAL_METADATA_PATH_TOKEN)); + } + private BlobContainer manifestContainer(String clusterName, String clusterUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); @@ -515,7 +639,7 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { // version> return String.join( DELIMITER, - INDEX_METADATA_FILE_PREFIX, + METADATA_FILE_PREFIX, RemoteStoreUtils.invertLong(indexMetadata.getVersion()), RemoteStoreUtils.invertLong(System.currentTimeMillis()), String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last @@ -523,6 +647,17 @@ static String indexMetadataFileName(IndexMetadata indexMetadata) { ); } + private static String globalMetadataFileName(Metadata metadata) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/metadata______ + return String.join( + DELIMITER, + METADATA_FILE_PREFIX, + RemoteStoreUtils.invertLong(metadata.version()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) + ); + } + private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { return getCusterMetadataBasePath(clusterName, clusterUUID).add(MANIFEST_PATH_TOKEN); } @@ -532,18 +667,18 @@ private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster + * @param clusterMetadataManifest manifest file of cluster * @return {@code Map} latest IndexUUID to IndexMetadata map */ - public Map getLatestIndexMetadata(String clusterName, String clusterUUID) throws IOException { - start(); - Map remoteIndexMetadata = new HashMap<>(); - Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!clusterMetadataManifest.isPresent()) { - throw new IllegalStateException("Latest index metadata is not present for the provided clusterUUID"); - } - assert Objects.equals(clusterUUID, clusterMetadataManifest.get().getClusterUUID()) + private Map getIndexMetadataMap( + String clusterName, + String clusterUUID, + ClusterMetadataManifest clusterMetadataManifest + ) { + assert Objects.equals(clusterUUID, clusterMetadataManifest.getClusterUUID()) : "Corrupt ClusterMetadataManifest found. Cluster UUID mismatch."; - for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.get().getIndices()) { + Map remoteIndexMetadata = new HashMap<>(); + for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.getIndices()) { IndexMetadata indexMetadata = getIndexMetadata(clusterName, clusterUUID, uploadedIndexMetadata); remoteIndexMetadata.put(uploadedIndexMetadata.getIndexUUID(), indexMetadata); } @@ -574,6 +709,52 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U } } + /** + * Fetch latest metadata from remote cluster state including global metadata and index metadata + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return {@link IndexMetadata} + */ + public Metadata getLatestMetadata(String clusterName, String clusterUUID) { + start(); + Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); + if (!clusterMetadataManifest.isPresent()) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) + ); + } + // Fetch Global Metadata + Metadata globalMetadata = getGlobalMetadata(clusterName, clusterUUID, clusterMetadataManifest.get()); + + // Fetch Index Metadata + Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); + + return Metadata.builder(globalMetadata).indices(indices).build(); + } + + private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { + String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); + try { + // Fetch Global metadata + if (globalMetadataFileName != null) { + String[] splitPath = globalMetadataFileName.split("/"); + return GLOBAL_METADATA_FORMAT.read( + globalMetadataContainer(clusterName, clusterUUID), + splitPath[splitPath.length - 1], + blobStoreRepository.getNamedXContentRegistry() + ); + } else { + return Metadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Global Metadata - %s", globalMetadataFileName), + e + ); + } + } + /** * Fetch latest ClusterMetadataManifest from remote state store * @@ -583,10 +764,7 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U */ public Optional getLatestClusterMetadataManifest(String clusterName, String clusterUUID) { Optional latestManifestFileName = getLatestManifestFileName(clusterName, clusterUUID); - if (latestManifestFileName.isPresent()) { - return Optional.of(fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, latestManifestFileName.get())); - } - return Optional.empty(); + return latestManifestFileName.map(s -> fetchRemoteClusterMetadataManifest(clusterName, clusterUUID, s)); } /** @@ -606,7 +784,8 @@ public String getLastKnownUUIDFromRemote(String clusterName) { return validChain.get(0); } catch (IOException e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName) + String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName), + e ); } } @@ -627,7 +806,8 @@ private Map getLatestManifestForAllClusterUUIDs manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest)); } catch (Exception e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID) + String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID), + e ); } } @@ -793,7 +973,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) throws IllegalStateException { try { - return RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.read( + return getClusterMetadataManifestBlobStoreFormat(filename).read( manifestContainer(clusterName, clusterUUID), filename, blobStoreRepository.getNamedXContentRegistry() @@ -803,6 +983,29 @@ private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String cluste } } + private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat(String fileName) { + long codecVersion = getManifestCodecVersion(fileName); + if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { + return CLUSTER_METADATA_MANIFEST_FORMAT; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V0) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V0; + } + + throw new IllegalArgumentException("Cluster metadata manifest file is corrupted, don't have valid codec version"); + } + + private int getManifestCodecVersion(String fileName) { + String[] splitName = fileName.split(DELIMITER); + if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { + return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. + } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 + // is used. + return ClusterMetadataManifest.CODEC_V0; + } else { + throw new IllegalArgumentException("Manifest file name is corrupted"); + } + } + public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } @@ -825,6 +1028,20 @@ public IndexMetadataTransferException(String errorDesc, Throwable cause) { } } + /** + * Exception for GlobalMetadata transfer failures to remote + */ + static class GlobalMetadataTransferException extends RuntimeException { + + public GlobalMetadataTransferException(String errorDesc) { + super(errorDesc); + } + + public GlobalMetadataTransferException(String errorDesc, Throwable cause) { + super(errorDesc, cause); + } + } + /** * Purges all remote cluster state against provided cluster UUIDs * @@ -917,6 +1134,7 @@ private void deleteClusterMetadata( Set filesToKeep = new HashSet<>(); Set staleManifestPaths = new HashSet<>(); Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); activeManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( clusterName, @@ -925,6 +1143,7 @@ private void deleteClusterMetadata( ); clusterMetadataManifest.getIndices() .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); }); staleManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( @@ -933,6 +1152,14 @@ private void deleteClusterMetadata( blobMetadata.name() ); staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { + String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + globalMetadataSplitPath[globalMetadataSplitPath.length - 1] + ) + ); + } clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { staleIndexMetadataPaths.add( @@ -948,6 +1175,7 @@ private void deleteClusterMetadata( return; } + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); } catch (IllegalStateException e) { diff --git a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java index c929e7421b3d8..88bb855a6e70d 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java +++ b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java @@ -16,4 +16,5 @@ public interface AuthToken { String asAuthHeaderValue(); + } diff --git a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java index 3fef248ee6d3a..00e50a59e9486 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java +++ b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java @@ -14,46 +14,17 @@ public class OnBehalfOfClaims { private final String audience; - private final String subject; - private final Long expiration; - private final Long not_before; - private final Long issued_at; + private final Long expiration_seconds; /** * Constructor for OnBehalfOfClaims * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - * @param issued_at the issued_at time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before, Long issued_at) { - this.audience = aud; - this.subject = subject; - this.expiration = expiration; - this.not_before = not_before; - this.issued_at = issued_at; - } - - /** - * A constructor that sets a default issued at time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before) { - this(aud, subject, expiration, not_before, System.currentTimeMillis() / 1000); - } + * @param expiration_seconds the length of time in seconds the token is valid - /** - * A constructor which sets a default not before time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token */ - public OnBehalfOfClaims(String aud, String subject, Long expiration) { - this(aud, subject, expiration, System.currentTimeMillis() / 1000); + public OnBehalfOfClaims(String aud, Long expiration_seconds) { + this.audience = aud; + this.expiration_seconds = expiration_seconds; } /** @@ -62,26 +33,14 @@ public OnBehalfOfClaims(String aud, String subject, Long expiration) { * @param subject the subject of the token */ public OnBehalfOfClaims(String aud, String subject) { - this(aud, subject, System.currentTimeMillis() / 1000 + 300); + this(aud, 300L); } public String getAudience() { return audience; } - public String getSubject() { - return subject; - } - public Long getExpiration() { - return expiration; - } - - public Long getNot_before() { - return not_before; - } - - public Long getIssued_at() { - return issued_at; + return expiration_seconds; } } diff --git a/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java b/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java new file mode 100644 index 0000000000000..3ba13bc7a2da4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/QueryShapeVisitor.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.search.BooleanClause; +import org.opensearch.common.SetOnce; + +import java.util.ArrayList; +import java.util.EnumMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +/** + * Class to traverse the QueryBuilder tree and capture the query shape + */ +public final class QueryShapeVisitor implements QueryBuilderVisitor { + private final SetOnce queryType = new SetOnce<>(); + private final Map> childVisitors = new EnumMap<>(BooleanClause.Occur.class); + + @Override + public void accept(QueryBuilder qb) { + queryType.set(qb.getName()); + } + + @Override + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + // Should get called once per Occur value + if (childVisitors.containsKey(occur)) { + throw new IllegalStateException("child visitor already called for " + occur); + } + final List childVisitorList = new ArrayList<>(); + QueryBuilderVisitor childVisitorWrapper = new QueryBuilderVisitor() { + QueryShapeVisitor currentChild; + + @Override + public void accept(QueryBuilder qb) { + currentChild = new QueryShapeVisitor(); + childVisitorList.add(currentChild); + currentChild.accept(qb); + } + + @Override + public QueryBuilderVisitor getChildVisitor(BooleanClause.Occur occur) { + return currentChild.getChildVisitor(occur); + } + }; + childVisitors.put(occur, childVisitorList); + return childVisitorWrapper; + } + + String toJson() { + StringBuilder outputBuilder = new StringBuilder("{\"type\":\"").append(queryType.get()).append("\""); + for (Map.Entry> entry : childVisitors.entrySet()) { + outputBuilder.append(",\"").append(entry.getKey().name().toLowerCase(Locale.ROOT)).append("\"["); + boolean first = true; + for (QueryShapeVisitor child : entry.getValue()) { + if (!first) { + outputBuilder.append(","); + } + outputBuilder.append(child.toJson()); + first = false; + } + outputBuilder.append("]"); + } + outputBuilder.append("}"); + return outputBuilder.toString(); + } + + public String prettyPrintTree(String indent) { + StringBuilder outputBuilder = new StringBuilder(indent).append(queryType.get()).append("\n"); + for (Map.Entry> entry : childVisitors.entrySet()) { + outputBuilder.append(indent).append(" ").append(entry.getKey().name().toLowerCase(Locale.ROOT)).append(":\n"); + for (QueryShapeVisitor child : entry.getValue()) { + outputBuilder.append(child.prettyPrintTree(indent + " ")); + } + } + return outputBuilder.toString(); + } +} diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 94fd08b99ac58..ac9cf35d1d8e5 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -10,15 +10,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.Version; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; import org.opensearch.cluster.block.ClusterBlocks; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.IndexTemplateMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.metadata.MetadataCreateIndexService; import org.opensearch.cluster.metadata.MetadataIndexUpgradeService; +import org.opensearch.cluster.metadata.RepositoriesMetadata; import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; @@ -27,6 +28,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.UUIDs; import org.opensearch.common.collect.Tuple; +import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; @@ -42,11 +44,13 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; +import static org.opensearch.repositories.blobstore.BlobStoreRepository.SYSTEM_REPOSITORY_SETTING; /** * Service responsible for restoring index data from remote store @@ -136,16 +140,16 @@ public RemoteRestoreResult restore( String[] indexNames ) { Map> indexMetadataMap = new HashMap<>(); + Metadata remoteMetadata = null; boolean metadataFromRemoteStore = (restoreClusterUUID == null || restoreClusterUUID.isEmpty() || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { - remoteClusterStateService.getLatestIndexMetadata(currentState.getClusterName().value(), restoreClusterUUID) - .values() - .forEach(indexMetadata -> { - indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); - }); + remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); + remoteMetadata.getIndices().values().forEach(indexMetadata -> { + indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); + }); } catch (Exception e) { throw new IllegalStateException("Unable to restore remote index metadata", e); } @@ -160,7 +164,7 @@ public RemoteRestoreResult restore( } } validate(currentState, indexMetadataMap, restoreClusterUUID, restoreAllShards); - return executeRestore(currentState, indexMetadataMap, restoreAllShards); + return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); } /** @@ -173,7 +177,8 @@ public RemoteRestoreResult restore( private RemoteRestoreResult executeRestore( ClusterState currentState, Map> indexMetadataMap, - boolean restoreAllShards + boolean restoreAllShards, + Metadata remoteMetadata ) { final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); @@ -226,6 +231,10 @@ private RemoteRestoreResult executeRestore( totalShards += updatedIndexMetadata.getNumberOfShards(); } + if (remoteMetadata != null) { + restoreGlobalMetadata(mdBuilder, remoteMetadata); + } + RestoreInfo restoreInfo = new RestoreInfo("remote_store", indicesToBeRestored, totalShards, totalShards); RoutingTable rt = rtBuilder.build(); @@ -233,6 +242,36 @@ private RemoteRestoreResult executeRestore( return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); } + private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { + if (remoteMetadata.persistentSettings() != null) { + Settings settings = remoteMetadata.persistentSettings(); + clusterService.getClusterSettings().validateUpdate(settings); + mdBuilder.persistentSettings(settings); + } + if (remoteMetadata.templates() != null) { + for (final IndexTemplateMetadata cursor : remoteMetadata.templates().values()) { + mdBuilder.put(cursor); + } + } + if (remoteMetadata.customs() != null) { + for (final Map.Entry cursor : remoteMetadata.customs().entrySet()) { + if (RepositoriesMetadata.TYPE.equals(cursor.getKey()) == false) { + mdBuilder.putCustom(cursor.getKey(), cursor.getValue()); + } + } + } + Optional repositoriesMetadata = Optional.ofNullable(remoteMetadata.custom(RepositoriesMetadata.TYPE)); + repositoriesMetadata = repositoriesMetadata.map( + repositoriesMetadata1 -> new RepositoriesMetadata( + repositoriesMetadata1.repositories() + .stream() + .filter(repository -> SYSTEM_REPOSITORY_SETTING.get(repository.settings()) == false) + .collect(Collectors.toList()) + ) + ); + repositoriesMetadata.ifPresent(metadata -> mdBuilder.putCustom(RepositoriesMetadata.TYPE, metadata)); + } + /** * Performs various validations needed before executing restore * @param currentState current cluster state @@ -297,8 +336,6 @@ private void validate( throw new IllegalStateException(finalErrorMsg); } - Version minIndexCompatibilityVersion = currentState.getNodes().getMaxNodeVersion().minimumIndexCompatibilityVersion(); - metadataIndexUpgradeService.upgradeIndexMetadata(indexMetadata, minIndexCompatibilityVersion); boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(indexMetadata.getSettings()); createIndexService.validateIndexName(indexName, currentState); createIndexService.validateDotIndex(indexName, isHidden); diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 9489c7d7fc1dd..5ebfd3863a6cf 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -1608,8 +1608,11 @@ public GatedCloseable acquireSafeIndexCommit() throws EngineExcepti } /** - * Compute and return the latest ReplicationCheckpoint for a particular shard. - * @return EMPTY checkpoint before the engine is opened and null for non-segrep enabled indices + * return the most recently computed ReplicationCheckpoint for a particular shard. + * The checkpoint is updated inside a refresh listener and may lag behind the SegmentInfos on the reader. + * To guarantee the checkpoint is upto date with the latest on-reader infos, use `getLatestSegmentInfosAndCheckpoint` instead. + * + * @return {@link ReplicationCheckpoint} - The most recently computed ReplicationCheckpoint. */ public ReplicationCheckpoint getLatestReplicationCheckpoint() { return replicationTracker.getLatestReplicationCheckpoint(); @@ -1628,34 +1631,12 @@ public ReplicationCheckpoint getLatestReplicationCheckpoint() { public Tuple, ReplicationCheckpoint> getLatestSegmentInfosAndCheckpoint() { assert indexSettings.isSegRepEnabled(); - Tuple, ReplicationCheckpoint> nullSegmentInfosEmptyCheckpoint = new Tuple<>( - new GatedCloseable<>(null, () -> {}), - getLatestReplicationCheckpoint() - ); - - if (getEngineOrNull() == null) { - return nullSegmentInfosEmptyCheckpoint; - } // do not close the snapshot - caller will close it. GatedCloseable snapshot = null; try { snapshot = getSegmentInfosSnapshot(); - if (snapshot.get() != null) { - SegmentInfos segmentInfos = snapshot.get(); - final Map metadataMap = store.getSegmentMetadataMap(segmentInfos); - return new Tuple<>( - snapshot, - new ReplicationCheckpoint( - this.shardId, - getOperationPrimaryTerm(), - segmentInfos.getGeneration(), - segmentInfos.getVersion(), - metadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), - getEngine().config().getCodec().getName(), - metadataMap - ) - ); - } + final SegmentInfos segmentInfos = snapshot.get(); + return new Tuple<>(snapshot, computeReplicationCheckpoint(segmentInfos)); } catch (IOException | AlreadyClosedException e) { logger.error("Error Fetching SegmentInfos and latest checkpoint", e); if (snapshot != null) { @@ -1666,7 +1647,39 @@ public Tuple, ReplicationCheckpoint> getLatestSegme } } } - return nullSegmentInfosEmptyCheckpoint; + return new Tuple<>(new GatedCloseable<>(null, () -> {}), getLatestReplicationCheckpoint()); + } + + /** + * Compute the latest {@link ReplicationCheckpoint} from a SegmentInfos. + * This function fetches a metadata snapshot from the store that comes with an IO cost. + * We will reuse the existing stored checkpoint if it is at the same SI version. + * + * @param segmentInfos {@link SegmentInfos} infos to use to compute. + * @return {@link ReplicationCheckpoint} Checkpoint computed from the infos. + * @throws IOException When there is an error computing segment metadata from the store. + */ + ReplicationCheckpoint computeReplicationCheckpoint(SegmentInfos segmentInfos) throws IOException { + if (segmentInfos == null) { + return ReplicationCheckpoint.empty(shardId); + } + final ReplicationCheckpoint latestReplicationCheckpoint = getLatestReplicationCheckpoint(); + if (latestReplicationCheckpoint.getSegmentInfosVersion() == segmentInfos.getVersion() + && latestReplicationCheckpoint.getSegmentsGen() == segmentInfos.getGeneration()) { + return latestReplicationCheckpoint; + } + final Map metadataMap = store.getSegmentMetadataMap(segmentInfos); + final ReplicationCheckpoint checkpoint = new ReplicationCheckpoint( + this.shardId, + getOperationPrimaryTerm(), + segmentInfos.getGeneration(), + segmentInfos.getVersion(), + metadataMap.values().stream().mapToLong(StoreFileMetadata::length).sum(), + getEngine().config().getCodec().getName(), + metadataMap + ); + logger.trace("Recomputed ReplicationCheckpoint for shard {}", checkpoint); + return checkpoint; } /** diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index 698e61f6f7a09..c650edc31da8d 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -181,7 +181,6 @@ private boolean syncSegments() { // in the remote store. return indexShard.state() != IndexShardState.STARTED || !(indexShard.getEngine() instanceof InternalEngine); } - ReplicationCheckpoint checkpoint = indexShard.getLatestReplicationCheckpoint(); beforeSegmentsSync(); long refreshTimeMs = segmentTracker.getLocalRefreshTimeMs(), refreshClockTimeMs = segmentTracker.getLocalRefreshClockTimeMs(); long refreshSeqNo = segmentTracker.getLocalRefreshSeqNo(); @@ -199,10 +198,7 @@ private boolean syncSegments() { try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { SegmentInfos segmentInfos = segmentInfosGatedCloseable.get(); - assert segmentInfos.getGeneration() == checkpoint.getSegmentsGen() : "SegmentInfos generation: " - + segmentInfos.getGeneration() - + " does not match metadata generation: " - + checkpoint.getSegmentsGen(); + final ReplicationCheckpoint checkpoint = indexShard.computeReplicationCheckpoint(segmentInfos); // Capture replication checkpoint before uploading the segments as upload can take some time and checkpoint can // move. long lastRefreshedCheckpoint = ((InternalEngine) indexShard.getEngine()).lastRefreshedCheckpoint(); diff --git a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java index 4fc721f2b96b5..727c57afd289b 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteStoreFileDownloader.java @@ -16,7 +16,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.logging.Loggers; -import org.opensearch.common.util.concurrent.UncategorizedExecutionException; +import org.opensearch.common.util.CancellableThreads; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.recovery.RecoverySettings; @@ -51,9 +51,16 @@ public RemoteStoreFileDownloader(ShardId shardId, ThreadPool threadPool, Recover * @param source The remote directory to copy segment files from * @param destination The local directory to copy segment files to * @param toDownloadSegments The list of segment files to download + * @param listener Callback listener to be notified upon completion */ - public void download(Directory source, Directory destination, Collection toDownloadSegments) throws IOException { - downloadInternal(source, destination, null, toDownloadSegments, () -> {}); + public void downloadAsync( + CancellableThreads cancellableThreads, + Directory source, + Directory destination, + Collection toDownloadSegments, + ActionListener listener + ) { + downloadInternal(cancellableThreads, source, destination, null, toDownloadSegments, () -> {}, listener); } /** @@ -74,17 +81,37 @@ public void download( Directory secondDestination, Collection toDownloadSegments, Runnable onFileCompletion - ) throws IOException { - downloadInternal(source, destination, secondDestination, toDownloadSegments, onFileCompletion); + ) throws InterruptedException, IOException { + final CancellableThreads cancellableThreads = new CancellableThreads(); + final PlainActionFuture listener = PlainActionFuture.newFuture(); + downloadInternal(cancellableThreads, source, destination, secondDestination, toDownloadSegments, onFileCompletion, listener); + try { + listener.get(); + } catch (ExecutionException e) { + if (e.getCause() instanceof RuntimeException) { + throw (RuntimeException) e.getCause(); + } else if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } + throw new RuntimeException(e); + } catch (InterruptedException e) { + // If the blocking call on the PlainActionFuture itself is interrupted, then we must + // cancel the asynchronous work we were waiting on + cancellableThreads.cancel(e.getMessage()); + Thread.currentThread().interrupt(); + throw e; + } } private void downloadInternal( + CancellableThreads cancellableThreads, Directory source, Directory destination, @Nullable Directory secondDestination, Collection toDownloadSegments, - Runnable onFileCompletion - ) throws IOException { + Runnable onFileCompletion, + ActionListener listener + ) { final Queue queue = new ConcurrentLinkedQueue<>(toDownloadSegments); // Choose the minimum of: // - number of files to download @@ -95,25 +122,14 @@ private void downloadInternal( Math.min(threadPool.info(ThreadPool.Names.REMOTE_RECOVERY).getMax(), recoverySettings.getMaxConcurrentRemoteStoreStreams()) ); logger.trace("Starting download of {} files with {} threads", queue.size(), threads); - final PlainActionFuture> listener = PlainActionFuture.newFuture(); - final ActionListener allFilesListener = new GroupedActionListener<>(listener, threads); + final ActionListener allFilesListener = new GroupedActionListener<>(ActionListener.map(listener, r -> null), threads); for (int i = 0; i < threads; i++) { - copyOneFile(source, destination, secondDestination, queue, onFileCompletion, allFilesListener); - } - try { - listener.actionGet(); - } catch (UncategorizedExecutionException e) { - // Any IOException will be double-wrapped so dig it out and throw it - if (e.getCause() instanceof ExecutionException) { - if (e.getCause().getCause() instanceof IOException) { - throw (IOException) e.getCause().getCause(); - } - } - throw e; + copyOneFile(cancellableThreads, source, destination, secondDestination, queue, onFileCompletion, allFilesListener); } } private void copyOneFile( + CancellableThreads cancellableThreads, Directory source, Directory destination, @Nullable Directory secondDestination, @@ -129,18 +145,20 @@ private void copyOneFile( threadPool.executor(ThreadPool.Names.REMOTE_RECOVERY).submit(() -> { logger.trace("Downloading file {}", file); try { - destination.copyFrom(source, file, file, IOContext.DEFAULT); - onFileCompletion.run(); - if (secondDestination != null) { - secondDestination.copyFrom(destination, file, file, IOContext.DEFAULT); - } + cancellableThreads.executeIO(() -> { + destination.copyFrom(source, file, file, IOContext.DEFAULT); + onFileCompletion.run(); + if (secondDestination != null) { + secondDestination.copyFrom(destination, file, file, IOContext.DEFAULT); + } + }); } catch (Exception e) { // Clear the queue to stop any future processing, report the failure, then return queue.clear(); listener.onFailure(e); return; } - copyOneFile(source, destination, secondDestination, queue, onFileCompletion, listener); + copyOneFile(cancellableThreads, source, destination, secondDestination, queue, onFileCompletion, listener); }); } } diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index 9dcd16c53e6f3..02fc8feefd698 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -22,6 +22,7 @@ import org.opensearch.transport.TransportService; import java.util.List; +import java.util.function.BiConsumer; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_CHECKPOINT_INFO; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_SEGMENT_FILES; @@ -80,8 +81,13 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { + // fileProgressTracker is a no-op for node to node recovery + // MultiFileWriter takes care of progress tracking for downloads in this scenario + // TODO: Move state management and tracking into replication methods and use chunking and data + // copy mechanisms only from MultiFileWriter final Writeable.Reader reader = GetSegmentFilesResponse::new; final ActionListener responseListener = ActionListener.map(listener, r -> r); final GetSegmentFilesRequest request = new GetSegmentFilesRequest( diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index d2000a56401f5..b06b3e0497cf7 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -15,6 +15,7 @@ import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.util.Version; import org.opensearch.common.concurrent.GatedCloseable; +import org.opensearch.common.util.CancellableThreads; import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.shard.IndexShardState; @@ -24,11 +25,15 @@ import org.opensearch.index.store.remote.metadata.RemoteSegmentMetadata; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; import java.util.stream.Collectors; /** @@ -42,6 +47,7 @@ public class RemoteStoreReplicationSource implements SegmentReplicationSource { private final IndexShard indexShard; private final RemoteSegmentStoreDirectory remoteDirectory; + private final CancellableThreads cancellableThreads = new CancellableThreads(); public RemoteStoreReplicationSource(IndexShard indexShard) { this.indexShard = indexShard; @@ -60,7 +66,7 @@ public void getCheckpointMetadata( // TODO: Need to figure out a way to pass this information for segment metadata via remote store. try (final GatedCloseable segmentInfosSnapshot = indexShard.getSegmentInfosSnapshot()) { final Version version = segmentInfosSnapshot.get().getCommitLuceneVersion(); - RemoteSegmentMetadata mdFile = remoteDirectory.init(); + final RemoteSegmentMetadata mdFile = getRemoteSegmentMetadata(); // During initial recovery flow, the remote store might not // have metadata as primary hasn't uploaded anything yet. if (mdFile == null && indexShard.state().equals(IndexShardState.STARTED) == false) { @@ -95,6 +101,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try { @@ -104,34 +111,50 @@ public void getSegmentFiles( } logger.debug("Downloading segment files from remote store {}", filesToFetch); - RemoteSegmentMetadata remoteSegmentMetadata = remoteDirectory.readLatestMetadataFile(); - Collection directoryFiles = List.of(indexShard.store().directory().listAll()); - if (remoteSegmentMetadata != null) { - try { - indexShard.store().incRef(); - indexShard.remoteStore().incRef(); - final Directory storeDirectory = indexShard.store().directory(); - final List toDownloadSegmentNames = new ArrayList<>(); - for (StoreFileMetadata fileMetadata : filesToFetch) { - String file = fileMetadata.name(); - assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; - toDownloadSegmentNames.add(file); - } - indexShard.getFileDownloader().download(remoteDirectory, storeDirectory, toDownloadSegmentNames); - logger.debug("Downloaded segment files from remote store {}", filesToFetch); - } finally { - indexShard.store().decRef(); - indexShard.remoteStore().decRef(); + if (remoteMetadataExists()) { + final Directory storeDirectory = indexShard.store().directory(); + final Collection directoryFiles = List.of(storeDirectory.listAll()); + final List toDownloadSegmentNames = new ArrayList<>(); + for (StoreFileMetadata fileMetadata : filesToFetch) { + String file = fileMetadata.name(); + assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; + toDownloadSegmentNames.add(file); } + indexShard.getFileDownloader() + .downloadAsync( + cancellableThreads, + remoteDirectory, + new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), + toDownloadSegmentNames, + ActionListener.map(listener, r -> new GetSegmentFilesResponse(filesToFetch)) + ); + } else { + listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } - listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); - } catch (Exception e) { + } catch (IOException | RuntimeException e) { listener.onFailure(e); } } + @Override + public void cancel() { + this.cancellableThreads.cancel("Canceled by target"); + } + @Override public String getDescription() { return "RemoteStoreReplicationSource"; } + + private boolean remoteMetadataExists() throws IOException { + final AtomicBoolean metadataExists = new AtomicBoolean(false); + cancellableThreads.executeIO(() -> metadataExists.set(remoteDirectory.readLatestMetadataFile() != null)); + return metadataExists.get(); + } + + private RemoteSegmentMetadata getRemoteSegmentMetadata() throws IOException { + AtomicReference mdFile = new AtomicReference<>(); + cancellableThreads.executeIO(() -> mdFile.set(remoteDirectory.init())); + return mdFile.get(); + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java index 6676b5b667e42..24f0cb15ddb25 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java @@ -8,13 +8,19 @@ package org.opensearch.indices.replication; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.util.CancellableThreads.ExecutionCancelledException; import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import java.io.IOException; import java.util.List; +import java.util.function.BiConsumer; /** * Represents the source of a replication event. @@ -39,6 +45,7 @@ public interface SegmentReplicationSource { * @param checkpoint {@link ReplicationCheckpoint} Checkpoint to fetch metadata for. * @param filesToFetch {@link List} List of files to fetch. * @param indexShard {@link IndexShard} Reference to the IndexShard. + * @param fileProgressTracker {@link BiConsumer} A consumer that updates the replication progress for shard files. * @param listener {@link ActionListener} Listener that completes with the list of files copied. */ void getSegmentFiles( @@ -46,6 +53,7 @@ void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); @@ -58,4 +66,69 @@ void getSegmentFiles( * Cancel any ongoing requests, should resolve any ongoing listeners with onFailure with a {@link ExecutionCancelledException}. */ default void cancel() {} + + /** + * Directory wrapper that records copy process for replication statistics + * + * @opensearch.internal + */ + final class ReplicationStatsDirectoryWrapper extends FilterDirectory { + private final BiConsumer fileProgressTracker; + + ReplicationStatsDirectoryWrapper(Directory in, BiConsumer fileProgressTracker) { + super(in); + this.fileProgressTracker = fileProgressTracker; + } + + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { + // here we wrap the index input form the source directory to report progress of file copy for the recovery stats. + // we increment the num bytes recovered in the readBytes method below, if users pull statistics they can see immediately + // how much has been recovered. + in.copyFrom(new FilterDirectory(from) { + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + final IndexInput input = in.openInput(name, context); + return new IndexInput("StatsDirectoryWrapper(" + input.toString() + ")") { + @Override + public void close() throws IOException { + input.close(); + } + + @Override + public long getFilePointer() { + throw new UnsupportedOperationException("only straight copies are supported"); + } + + @Override + public void seek(long pos) throws IOException { + throw new UnsupportedOperationException("seeks are not supported"); + } + + @Override + public long length() { + return input.length(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + throw new UnsupportedOperationException("slices are not supported"); + } + + @Override + public byte readByte() throws IOException { + throw new UnsupportedOperationException("use a buffer if you wanna perform well"); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + // we rely on the fact that copyFrom uses a buffer + input.readBytes(b, offset, len); + fileProgressTracker.accept(dest, (long) len); + } + }; + } + }, src, dest, context); + } + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 0eb6ce36fa63d..cd6dbe8af90d9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -170,7 +170,14 @@ public void startReplication(ActionListener listener) { final List filesToFetch = getFiles(checkpointInfo); state.setStage(SegmentReplicationState.Stage.GET_FILES); cancellableThreads.checkForCancel(); - source.getSegmentFiles(getId(), checkpointInfo.getCheckpoint(), filesToFetch, indexShard, getFilesListener); + source.getSegmentFiles( + getId(), + checkpointInfo.getCheckpoint(), + filesToFetch, + indexShard, + this::updateFileRecoveryBytes, + getFilesListener + ); }, listener::onFailure); getFilesListener.whenComplete(response -> { @@ -240,6 +247,20 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { } } + /** + * Updates the state to reflect recovery progress for the given file and + * updates the last access time for the target. + * @param fileName Name of the file being downloaded + * @param bytesRecovered Number of bytes recovered + */ + private void updateFileRecoveryBytes(String fileName, long bytesRecovered) { + ReplicationLuceneIndex index = state.getIndex(); + if (index != null) { + index.addRecoveredBytesToFile(fileName, bytesRecovered); + } + setLastAccessTime(); + } + private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) throws OpenSearchCorruptionException { cancellableThreads.checkForCancel(); state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); diff --git a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java index ec6b4d06b32c3..aac59df4f6573 100644 --- a/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/common/ReplicationTarget.java @@ -91,6 +91,9 @@ public ReplicationTarget(String name, IndexShard indexShard, ReplicationLuceneIn // make sure the store is not released until we are done. this.cancellableThreads = new CancellableThreads(); store.incRef(); + if (indexShard.indexSettings().isRemoteStoreEnabled()) { + indexShard.remoteStore().incRef(); + } } public long getId() { @@ -278,6 +281,12 @@ public abstract void writeFileChunk( ); protected void closeInternal() { - store.decRef(); + try { + store.decRef(); + } finally { + if (indexShard.indexSettings().isRemoteStoreEnabled()) { + indexShard.remoteStore().decRef(); + } + } } } diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java index 4e2e9f280d765..8446ab0dd6166 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java @@ -335,7 +335,7 @@ public DeviceStats(StreamInput in) throws IOException { previousSectorsRead = in.readLong(); currentSectorsWritten = in.readLong(); previousSectorsWritten = in.readLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { currentReadTime = in.readLong(); previousReadTime = in.readLong(); currentWriteTime = in.readLong(); @@ -369,7 +369,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(previousSectorsRead); out.writeLong(currentSectorsWritten); out.writeLong(previousSectorsWritten); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeLong(currentReadTime); out.writeLong(previousReadTime); out.writeLong(currentWriteTime); @@ -533,7 +533,7 @@ public IoStats(StreamInput in) throws IOException { this.totalWriteOperations = in.readLong(); this.totalReadKilobytes = in.readLong(); this.totalWriteKilobytes = in.readLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.totalReadTime = in.readLong(); this.totalWriteTime = in.readLong(); this.totalQueueSize = in.readLong(); @@ -557,7 +557,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalWriteOperations); out.writeLong(totalReadKilobytes); out.writeLong(totalWriteKilobytes); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeLong(totalReadTime); out.writeLong(totalWriteTime); out.writeLong(totalQueueSize); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 69b80462bbf0b..c9148f382a028 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1115,7 +1115,8 @@ protected Node( searchPipelineService, fileCache, taskCancellationMonitoringService, - resourceUsageCollectorService + resourceUsageCollectorService, + repositoryService ); final SearchService searchService = newSearchService( diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index 9bb07080fa717..e2d7bc2c86ba3 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -53,6 +53,7 @@ import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; import org.opensearch.plugins.PluginsService; +import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.AggregationUsageService; import org.opensearch.search.backpressure.SearchBackpressureService; @@ -93,6 +94,7 @@ public class NodeService implements Closeable { private final Discovery discovery; private final FileCache fileCache; private final TaskCancellationMonitoringService taskCancellationMonitoringService; + private final RepositoriesService repositoriesService; NodeService( Settings settings, @@ -116,7 +118,8 @@ public class NodeService implements Closeable { SearchPipelineService searchPipelineService, FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, - ResourceUsageCollectorService resourceUsageCollectorService + ResourceUsageCollectorService resourceUsageCollectorService, + RepositoriesService repositoriesService ) { this.settings = settings; this.threadPool = threadPool; @@ -140,6 +143,7 @@ public class NodeService implements Closeable { this.fileCache = fileCache; this.taskCancellationMonitoringService = taskCancellationMonitoringService; this.resourceUsageCollectorService = resourceUsageCollectorService; + this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); } @@ -221,7 +225,8 @@ public NodeStats stats( boolean fileCacheStats, boolean taskCancellation, boolean searchPipelineStats, - boolean resourceUsageStats + boolean resourceUsageStats, + boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will // only be applied to the sensible ones to use, like refresh/merge/flush/indexing stats) @@ -250,7 +255,8 @@ public NodeStats stats( weightedRoutingStats ? WeightedRoutingStats.getInstance() : null, fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, - searchPipelineStats ? this.searchPipelineService.stats() : null + searchPipelineStats ? this.searchPipelineService.stats() : null, + repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 72266c053a1ae..68669feb16abc 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -457,7 +457,6 @@ public void applyClusterState(ClusterChangedEvent event) { logger.debug("unregistering repository [{}]", entry.getKey()); Repository repository = entry.getValue(); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); } else { survivors.put(entry.getKey(), entry.getValue()); } @@ -485,7 +484,6 @@ public void applyClusterState(ClusterChangedEvent event) { } else { logger.debug("updating repository [{}]", repositoryMetadata.name()); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); repository = null; try { repository = createRepository(repositoryMetadata, typesRegistry); @@ -575,12 +573,12 @@ public Repository repository(String repositoryName) { } public List repositoriesStats() { - List archivedRepoStats = repositoriesStatsArchive.getArchivedStats(); List activeRepoStats = getRepositoryStatsForActiveRepositories(); + return activeRepoStats; + } - List repositoriesStats = new ArrayList<>(archivedRepoStats); - repositoriesStats.addAll(activeRepoStats); - return repositoriesStats; + public RepositoriesStats getRepositoriesStats() { + return new RepositoriesStats(repositoriesStats()); } private List getRepositoryStatsForActiveRepositories() { @@ -640,15 +638,6 @@ public void closeRepository(Repository repository) { repository.close(); } - private void archiveRepositoryStats(Repository repository, long clusterStateVersion) { - if (repository instanceof MeteredBlobStoreRepository) { - RepositoryStatsSnapshot stats = ((MeteredBlobStoreRepository) repository).statsSnapshotForArchival(clusterStateVersion); - if (repositoriesStatsArchive.archive(stats) == false) { - logger.warn("Unable to archive the repository stats [{}] as the archive is full.", stats); - } - } - } - /** * Creates repository holder. This method starts the non-internal repository */ diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java new file mode 100644 index 0000000000000..b24e0dddd852a --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.util.CollectionUtils; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +/** + * Encapsulates stats for multiple repositories* + */ +public class RepositoriesStats implements Writeable, ToXContentObject { + + List repositoryStatsSnapshots; + + public RepositoriesStats(List repositoryStatsSnapshots) { + this.repositoryStatsSnapshots = repositoryStatsSnapshots; + } + + public RepositoriesStats(StreamInput in) throws IOException { + this.repositoryStatsSnapshots = in.readList(RepositoryStatsSnapshot::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(repositoryStatsSnapshots); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startArray("repositories"); + if (CollectionUtils.isEmpty(repositoryStatsSnapshots) == false) { + for (RepositoryStatsSnapshot repositoryStatsSnapshot : repositoryStatsSnapshots) { + repositoryStatsSnapshot.toXContent(builder, params); + } + } + builder.endArray(); + return builder; + } +} diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java index b8f100706f81e..3d35f75176eaf 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java @@ -70,11 +70,6 @@ public RepositoriesStatsArchive(TimeValue retentionPeriod, int maxCapacity, Long * @return {@code true} if the repository stats were archived, {@code false} otherwise. */ synchronized boolean archive(final RepositoryStatsSnapshot repositoryStats) { - assert containsRepositoryStats(repositoryStats) == false : "A repository with ephemeral id " - + repositoryStats.getRepositoryInfo().ephemeralId - + " is already archived"; - assert repositoryStats.isArchived(); - evict(); if (archive.size() >= maxCapacity) { @@ -116,15 +111,6 @@ private void evict() { } } - private boolean containsRepositoryStats(RepositoryStatsSnapshot repositoryStats) { - return archive.stream() - .anyMatch( - entry -> entry.repositoryStatsSnapshot.getRepositoryInfo().ephemeralId.equals( - repositoryStats.getRepositoryInfo().ephemeralId - ) - ); - } - private static class ArchiveEntry { private final RepositoryStatsSnapshot repositoryStatsSnapshot; private final long createdAtMillis; diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java index 8aa86fc46d591..387a685bd6526 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.Nullable; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -51,64 +50,27 @@ * @opensearch.internal */ public final class RepositoryInfo implements Writeable, ToXContentFragment { - public final String ephemeralId; public final String name; public final String type; public final Map location; - public final long startedAt; - @Nullable - public final Long stoppedAt; - public RepositoryInfo(String ephemeralId, String name, String type, Map location, long startedAt) { - this(ephemeralId, name, type, location, startedAt, null); - } - - public RepositoryInfo( - String ephemeralId, - String name, - String type, - Map location, - long startedAt, - @Nullable Long stoppedAt - ) { - this.ephemeralId = ephemeralId; + public RepositoryInfo(String name, String type, Map location) { this.name = name; this.type = type; this.location = location; - this.startedAt = startedAt; - if (stoppedAt != null && startedAt > stoppedAt) { - throw new IllegalArgumentException("createdAt must be before or equal to stoppedAt"); - } - this.stoppedAt = stoppedAt; } public RepositoryInfo(StreamInput in) throws IOException { - this.ephemeralId = in.readString(); this.name = in.readString(); this.type = in.readString(); this.location = in.readMap(StreamInput::readString, StreamInput::readString); - this.startedAt = in.readLong(); - this.stoppedAt = in.readOptionalLong(); - } - - public RepositoryInfo stopped(long stoppedAt) { - assert isStopped() == false : "The repository is already stopped"; - - return new RepositoryInfo(ephemeralId, name, type, location, startedAt, stoppedAt); - } - - public boolean isStopped() { - return stoppedAt != null; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(ephemeralId); out.writeString(name); out.writeString(type); out.writeMap(location, StreamOutput::writeString, StreamOutput::writeString); - out.writeLong(startedAt); - out.writeOptionalLong(stoppedAt); } @Override @@ -116,11 +78,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("repository_name", name); builder.field("repository_type", type); builder.field("repository_location", location); - builder.field("repository_ephemeral_id", ephemeralId); - builder.field("repository_started_at", startedAt); - if (stoppedAt != null) { - builder.field("repository_stopped_at", stoppedAt); - } return builder; } @@ -129,17 +86,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryInfo that = (RepositoryInfo) o; - return ephemeralId.equals(that.ephemeralId) - && name.equals(that.name) - && type.equals(that.type) - && location.equals(that.location) - && startedAt == that.startedAt - && Objects.equals(stoppedAt, that.stoppedAt); + return name.equals(that.name) && type.equals(that.type) && location.equals(that.location); } @Override public int hashCode() { - return Objects.hash(ephemeralId, name, type, location, startedAt, stoppedAt); + return Objects.hash(name, type, location); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java index efd5d6f8560b6..ab97c5eaa1f7a 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java @@ -32,9 +32,13 @@ package org.opensearch.repositories; +import org.opensearch.common.Nullable; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; import java.util.Collections; @@ -47,32 +51,63 @@ * * @opensearch.internal */ -public class RepositoryStats implements Writeable { +public class RepositoryStats implements Writeable, ToXContentFragment { public static final RepositoryStats EMPTY_STATS = new RepositoryStats(Collections.emptyMap()); + @Nullable public final Map requestCounts; + @Nullable + public final Map> extendedStats; + public final boolean detailed; public RepositoryStats(Map requestCounts) { this.requestCounts = Collections.unmodifiableMap(requestCounts); + this.extendedStats = Collections.emptyMap(); + this.detailed = false; + } + + public RepositoryStats(Map> extendedStats, boolean detailed) { + this.requestCounts = Collections.emptyMap(); + this.extendedStats = Collections.unmodifiableMap(extendedStats); + this.detailed = detailed; } public RepositoryStats(StreamInput in) throws IOException { this.requestCounts = in.readMap(StreamInput::readString, StreamInput::readLong); + this.extendedStats = in.readMap( + e -> e.readEnum(BlobStore.Metric.class), + i -> i.readMap(StreamInput::readString, StreamInput::readLong) + ); + this.detailed = in.readBoolean(); } public RepositoryStats merge(RepositoryStats otherStats) { - final Map result = new HashMap<>(); - result.putAll(requestCounts); - for (Map.Entry entry : otherStats.requestCounts.entrySet()) { - result.merge(entry.getKey(), entry.getValue(), Math::addExact); + assert this.detailed == otherStats.detailed; + if (detailed) { + final Map> result = new HashMap<>(); + result.putAll(extendedStats); + for (Map.Entry> entry : otherStats.extendedStats.entrySet()) { + for (Map.Entry nested : entry.getValue().entrySet()) { + result.get(entry.getKey()).merge(nested.getKey(), nested.getValue(), Math::addExact); + } + } + return new RepositoryStats(result, true); + } else { + final Map result = new HashMap<>(); + result.putAll(requestCounts); + for (Map.Entry entry : otherStats.requestCounts.entrySet()) { + result.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + return new RepositoryStats(result); } - return new RepositoryStats(result); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(requestCounts, StreamOutput::writeString, StreamOutput::writeLong); + out.writeMap(extendedStats, StreamOutput::writeEnum, (o, v) -> o.writeMap(v, StreamOutput::writeString, StreamOutput::writeLong)); + out.writeBoolean(detailed); } @Override @@ -80,16 +115,32 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryStats that = (RepositoryStats) o; - return requestCounts.equals(that.requestCounts); + return requestCounts.equals(that.requestCounts) && extendedStats.equals(that.extendedStats) && detailed == that.detailed; } @Override public int hashCode() { - return Objects.hash(requestCounts); + return Objects.hash(requestCounts, detailed, extendedStats); } @Override public String toString() { - return "RepositoryStats{" + "requestCounts=" + requestCounts + '}'; + return "RepositoryStats{" + "requestCounts=" + requestCounts + "extendedStats=" + extendedStats + "detailed =" + detailed + "}"; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (detailed == false) { + builder.field("request_counts", requestCounts); + } else { + extendedStats.forEach((k, v) -> { + try { + builder.field(k.metricName(), v); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + return builder; } } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java index 2b061cd2c2cc9..0a727980fad0d 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java @@ -53,21 +53,17 @@ public final class RepositoryStatsSnapshot implements Writeable, ToXContentObjec private final RepositoryInfo repositoryInfo; private final RepositoryStats repositoryStats; private final long clusterVersion; - private final boolean archived; - public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion, boolean archived) { - assert archived != (clusterVersion == UNKNOWN_CLUSTER_VERSION); + public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion) { this.repositoryInfo = repositoryInfo; this.repositoryStats = repositoryStats; this.clusterVersion = clusterVersion; - this.archived = archived; } public RepositoryStatsSnapshot(StreamInput in) throws IOException { this.repositoryInfo = new RepositoryInfo(in); this.repositoryStats = new RepositoryStats(in); this.clusterVersion = in.readLong(); - this.archived = in.readBoolean(); } public RepositoryInfo getRepositoryInfo() { @@ -78,10 +74,6 @@ public RepositoryStats getRepositoryStats() { return repositoryStats; } - public boolean isArchived() { - return archived; - } - public long getClusterVersion() { return clusterVersion; } @@ -91,18 +83,13 @@ public void writeTo(StreamOutput out) throws IOException { repositoryInfo.writeTo(out); repositoryStats.writeTo(out); out.writeLong(clusterVersion); - out.writeBoolean(archived); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); repositoryInfo.toXContent(builder, params); - builder.field("request_counts", repositoryStats.requestCounts); - builder.field("archived", archived); - if (archived) { - builder.field("cluster_version", clusterVersion); - } + repositoryStats.toXContent(builder, params); builder.endObject(); return builder; } @@ -114,13 +101,12 @@ public boolean equals(Object o) { RepositoryStatsSnapshot that = (RepositoryStatsSnapshot) o; return repositoryInfo.equals(that.repositoryInfo) && repositoryStats.equals(that.repositoryStats) - && clusterVersion == that.clusterVersion - && archived == that.archived; + && clusterVersion == that.clusterVersion; } @Override public int hashCode() { - return Objects.hash(repositoryInfo, repositoryStats, clusterVersion, archived); + return Objects.hash(repositoryInfo, repositoryStats, clusterVersion); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 69883e0d19c8d..8a2260e1f6d90 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -173,6 +173,7 @@ import java.util.stream.Stream; import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName; +import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS; /** * BlobStore - based implementation of Snapshot Repository @@ -850,6 +851,8 @@ public RepositoryStats stats() { final BlobStore store = blobStore.get(); if (store == null) { return RepositoryStats.EMPTY_STATS; + } else if (store.extendedStats() != null && store.extendedStats().isEmpty() == false) { + return new RepositoryStats(store.extendedStats(), true); } return new RepositoryStats(store.stats()); } @@ -3333,7 +3336,12 @@ private void writeShardIndexBlobAtomic( () -> new ParameterizedMessage("[{}] Writing shard index [{}] to [{}]", metadata.name(), indexGeneration, shardContainer.path()) ); final String blobName = INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(String.valueOf(indexGeneration)); - writeAtomic(shardContainer, blobName, INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor), true); + writeAtomic( + shardContainer, + blobName, + INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS), + true + ); } // Unused blobs are all previous index-, data- and meta-blobs and that are not referenced by the new index- as well as all diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 7e1960171043a..17cb68f798094 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -83,7 +83,7 @@ public final class ChecksumBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization - private static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; + public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; static { Map snapshotOnlyParams = new HashMap<>(); @@ -171,7 +171,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } @@ -184,13 +184,15 @@ public void write(final T obj, final BlobContainer blobContainer, final String n * @param name blob name * @param compressor whether to use compression * @param listener listener to listen to write result + * @param params ToXContent params */ public void writeAsync( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, - ActionListener listener + ActionListener listener, + final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { write(obj, blobContainer, name, compressor); @@ -198,7 +200,7 @@ public void writeAsync( return; } final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, params); final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; @@ -230,7 +232,8 @@ public void writeAsync( } } - public BytesReference serialize(final T obj, final String blobName, final Compressor compressor) throws IOException { + public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) + throws IOException { try (BytesStreamOutput outputStream = new BytesStreamOutput()) { try ( OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( @@ -254,7 +257,7 @@ public void close() throws IOException { ) ) { builder.startObject(); - obj.toXContent(builder, SNAPSHOT_ONLY_FORMAT_PARAMS); + obj.toXContent(builder, params); builder.endObject(); } CodecUtil.writeFooter(indexOutput); diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java index d4921f4e6d2e7..0651ff586d412 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java @@ -34,12 +34,10 @@ import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.UUIDs; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.repositories.RepositoryInfo; import org.opensearch.repositories.RepositoryStatsSnapshot; -import org.opensearch.threadpool.ThreadPool; import java.util.Map; @@ -59,14 +57,7 @@ public MeteredBlobStoreRepository( Map location ) { super(metadata, namedXContentRegistry, clusterService, recoverySettings); - ThreadPool threadPool = clusterService.getClusterApplierService().threadPool(); - this.repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), - metadata.name(), - metadata.type(), - location, - threadPool.absoluteTimeInMillis() - ); + this.repositoryInfo = new RepositoryInfo(metadata.name(), metadata.type(), location); } @Override @@ -78,11 +69,6 @@ public void reload(RepositoryMetadata repositoryMetadata) { } public RepositoryStatsSnapshot statsSnapshot() { - return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION, false); - } - - public RepositoryStatsSnapshot statsSnapshotForArchival(long clusterVersion) { - RepositoryInfo stoppedRepoInfo = repositoryInfo.stopped(threadPool.absoluteTimeInMillis()); - return new RepositoryStatsSnapshot(stoppedRepoInfo, stats(), clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION); } } diff --git a/server/src/main/java/org/opensearch/search/DocValueFormat.java b/server/src/main/java/org/opensearch/search/DocValueFormat.java index 412191c57abd8..7be51643eeb7d 100644 --- a/server/src/main/java/org/opensearch/search/DocValueFormat.java +++ b/server/src/main/java/org/opensearch/search/DocValueFormat.java @@ -243,7 +243,7 @@ public DateTime(DateFormatter formatter, ZoneId timeZone, DateFieldMapper.Resolu } public DateTime(StreamInput in) throws IOException { - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.formatter = DateFormatter.forPattern(in.readString(), in.readOptionalString()); } else { this.formatter = DateFormatter.forPattern(in.readString()); @@ -265,12 +265,12 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { - if (out.getVersion().before(Version.V_3_0_0) && formatter.equals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER)) { + if (out.getVersion().before(Version.V_2_12_0) && formatter.equals(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER)) { out.writeString(DateFieldMapper.LEGACY_DEFAULT_DATE_TIME_FORMATTER.pattern()); // required for backwards compatibility } else { out.writeString(formatter.pattern()); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalString(formatter.printPattern()); } out.writeString(timeZone.getId()); diff --git a/server/src/main/java/org/opensearch/search/profile/Profilers.java b/server/src/main/java/org/opensearch/search/profile/Profilers.java index 8e87c7ff4acd4..68cf05c988b5b 100644 --- a/server/src/main/java/org/opensearch/search/profile/Profilers.java +++ b/server/src/main/java/org/opensearch/search/profile/Profilers.java @@ -35,6 +35,9 @@ import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.profile.aggregation.AggregationProfiler; import org.opensearch.search.profile.aggregation.ConcurrentAggregationProfiler; +import org.opensearch.search.profile.query.ConcurrentQueryProfileTree; +import org.opensearch.search.profile.query.ConcurrentQueryProfiler; +import org.opensearch.search.profile.query.InternalQueryProfileTree; import org.opensearch.search.profile.query.QueryProfiler; import java.util.ArrayList; @@ -64,7 +67,9 @@ public Profilers(ContextIndexSearcher searcher, boolean isConcurrentSegmentSearc /** Switch to a new profile. */ public QueryProfiler addQueryProfiler() { - QueryProfiler profiler = new QueryProfiler(isConcurrentSegmentSearchEnabled); + QueryProfiler profiler = isConcurrentSegmentSearchEnabled + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); queryProfilers.add(profiler); return profiler; diff --git a/server/src/main/java/org/opensearch/search/profile/Timer.java b/server/src/main/java/org/opensearch/search/profile/Timer.java index 172762cabeb6a..864c689cf7fa0 100644 --- a/server/src/main/java/org/opensearch/search/profile/Timer.java +++ b/server/src/main/java/org/opensearch/search/profile/Timer.java @@ -53,6 +53,18 @@ public class Timer { private boolean doTiming; private long timing, count, lastCount, start, earliestTimerStartTime; + public Timer() { + this(0, 0, 0, 0, 0); + } + + public Timer(long timing, long count, long lastCount, long start, long earliestTimerStartTime) { + this.timing = timing; + this.count = count; + this.lastCount = lastCount; + this.start = start; + this.earliestTimerStartTime = earliestTimerStartTime; + } + /** pkg-private for testing */ long nanoTime() { return System.nanoTime(); diff --git a/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java b/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java index 8e825def13f5d..2f5d632ee2d87 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java +++ b/server/src/main/java/org/opensearch/search/profile/query/AbstractQueryProfileTree.java @@ -54,14 +54,11 @@ public void startRewriteTime() { * startRewriteTime() must be called for a particular context prior to calling * stopAndAddRewriteTime(), otherwise the elapsed time will be negative and * nonsensical - * - * @return The elapsed time */ - public long stopAndAddRewriteTime() { + public void stopAndAddRewriteTime() { long time = Math.max(1, System.nanoTime() - rewriteScratch); rewriteTime += time; rewriteScratch = 0; - return time; } public long getRewriteTime() { diff --git a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java index e567fdd2d436c..59ef01f9f947a 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java +++ b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdown.java @@ -70,7 +70,7 @@ public Map toBreakdownMap() { ); final long createWeightTime = topLevelBreakdownMapWithWeightTime.get(QueryTimingType.CREATE_WEIGHT.toString()); - if (sliceCollectorsToLeaves.isEmpty() || contexts.isEmpty()) { + if (contexts.isEmpty()) { // If there are no leaf contexts, then return the default concurrent query level breakdown, which will include the // create_weight time/count queryNodeTime = createWeightTime; @@ -78,6 +78,21 @@ public Map toBreakdownMap() { minSliceNodeTime = 0L; avgSliceNodeTime = 0L; return buildDefaultQueryBreakdownMap(createWeightTime); + } else if (sliceCollectorsToLeaves.isEmpty()) { + // This will happen when each slice executes search leaf for its leaves and query is rewritten for the leaf being searched. It + // creates a new weight and breakdown map for each rewritten query. This new breakdown map captures the timing information for + // the new rewritten query. The sliceCollectorsToLeaves is empty because this breakdown for rewritten query gets created later + // in search leaf path which doesn't have collector. Also, this is not needed since this breakdown is per leaf and there is no + // concurrency involved. An empty sliceCollectorsToLeaves could also happen in the case of early termination. + AbstractProfileBreakdown breakdown = contexts.values().iterator().next(); + queryNodeTime = breakdown.toNodeTime() + createWeightTime; + maxSliceNodeTime = 0L; + minSliceNodeTime = 0L; + avgSliceNodeTime = 0L; + Map queryBreakdownMap = new HashMap<>(breakdown.toBreakdownMap()); + queryBreakdownMap.put(QueryTimingType.CREATE_WEIGHT.toString(), createWeightTime); + queryBreakdownMap.put(QueryTimingType.CREATE_WEIGHT + TIMING_TYPE_COUNT_SUFFIX, 1L); + return queryBreakdownMap; } // first create the slice level breakdowns @@ -191,10 +206,12 @@ Map> buildSliceLevelBreakdown() { } // compute sliceMaxEndTime as max of sliceEndTime across all timing types sliceMaxEndTime = Math.max(sliceMaxEndTime, currentSliceBreakdown.getOrDefault(timingTypeSliceEndTimeKey, Long.MIN_VALUE)); - sliceMinStartTime = Math.min( - sliceMinStartTime, - currentSliceBreakdown.getOrDefault(timingTypeSliceStartTimeKey, Long.MAX_VALUE) - ); + long currentSliceStartTime = currentSliceBreakdown.getOrDefault(timingTypeSliceStartTimeKey, Long.MAX_VALUE); + if (currentSliceStartTime == 0L) { + // The timer for the current timing type never starts, so we continue here + continue; + } + sliceMinStartTime = Math.min(sliceMinStartTime, currentSliceStartTime); // compute total time for each timing type at slice level using sliceEndTime and sliceStartTime currentSliceBreakdown.put( timingType.toString(), diff --git a/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java new file mode 100644 index 0000000000000..42bf23bb13fbe --- /dev/null +++ b/server/src/main/java/org/opensearch/search/profile/query/ConcurrentQueryProfiler.java @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.profile.query; + +import org.apache.lucene.search.Query; +import org.opensearch.search.profile.ContextualProfileBreakdown; +import org.opensearch.search.profile.ProfileResult; +import org.opensearch.search.profile.Timer; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This class acts as a thread-local storage for profiling a query with concurrent execution + * + * @opensearch.internal + */ +public final class ConcurrentQueryProfiler extends QueryProfiler { + + private final Map threadToProfileTree; + // The LinkedList does not need to be thread safe, as the map associates thread IDs with LinkedList, and only + // one thread will access the LinkedList at a time. + private final Map> threadToRewriteTimers; + + public ConcurrentQueryProfiler(AbstractQueryProfileTree profileTree) { + super(profileTree); + long threadId = getCurrentThreadId(); + // We utilize LinkedHashMap to preserve the insertion order of the profiled queries + threadToProfileTree = Collections.synchronizedMap(new LinkedHashMap<>()); + threadToProfileTree.put(threadId, (ConcurrentQueryProfileTree) profileTree); + threadToRewriteTimers = new ConcurrentHashMap<>(); + threadToRewriteTimers.put(threadId, new LinkedList<>()); + } + + @Override + public ContextualProfileBreakdown getQueryBreakdown(Query query) { + ConcurrentQueryProfileTree profileTree = threadToProfileTree.computeIfAbsent( + getCurrentThreadId(), + k -> new ConcurrentQueryProfileTree() + ); + return profileTree.getProfileBreakdown(query); + } + + /** + * Removes the last (e.g. most recent) element on ConcurrentQueryProfileTree stack. + */ + @Override + public void pollLastElement() { + ConcurrentQueryProfileTree concurrentProfileTree = threadToProfileTree.get(getCurrentThreadId()); + if (concurrentProfileTree != null) { + concurrentProfileTree.pollLast(); + } + } + + /** + * @return a hierarchical representation of the profiled tree + */ + @Override + public List getTree() { + List profileResults = new ArrayList<>(); + for (Map.Entry profile : threadToProfileTree.entrySet()) { + profileResults.addAll(profile.getValue().getTree()); + } + return profileResults; + } + + /** + * Begin timing the rewrite phase of a request + */ + @Override + public void startRewriteTime() { + Timer rewriteTimer = new Timer(); + threadToRewriteTimers.computeIfAbsent(getCurrentThreadId(), k -> new LinkedList<>()).add(rewriteTimer); + rewriteTimer.start(); + } + + /** + * Stop recording the current rewrite timer + */ + public void stopAndAddRewriteTime() { + Timer rewriteTimer = threadToRewriteTimers.get(getCurrentThreadId()).getLast(); + rewriteTimer.stop(); + } + + /** + * @return total time taken to rewrite all queries in this concurrent query profiler + */ + @Override + public long getRewriteTime() { + long totalRewriteTime = 0L; + List rewriteTimers = new LinkedList<>(); + threadToRewriteTimers.values().forEach(rewriteTimers::addAll); + LinkedList mergedIntervals = mergeRewriteTimeIntervals(rewriteTimers); + for (long[] interval : mergedIntervals) { + totalRewriteTime += interval[1] - interval[0]; + } + return totalRewriteTime; + } + + // package private for unit testing + LinkedList mergeRewriteTimeIntervals(List timers) { + LinkedList mergedIntervals = new LinkedList<>(); + timers.sort(Comparator.comparingLong(Timer::getEarliestTimerStartTime)); + for (Timer timer : timers) { + long startTime = timer.getEarliestTimerStartTime(); + long endTime = startTime + timer.getApproximateTiming(); + if (mergedIntervals.isEmpty() || mergedIntervals.getLast()[1] < startTime) { + long[] interval = new long[2]; + interval[0] = startTime; + interval[1] = endTime; + mergedIntervals.add(interval); + } else { + mergedIntervals.getLast()[1] = Math.max(mergedIntervals.getLast()[1], endTime); + } + } + return mergedIntervals; + } + + private long getCurrentThreadId() { + return Thread.currentThread().getId(); + } +} diff --git a/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java b/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java index a80ce1c658081..332c4b3551450 100644 --- a/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java +++ b/server/src/main/java/org/opensearch/search/profile/query/QueryProfiler.java @@ -51,15 +51,15 @@ * * @opensearch.internal */ -public final class QueryProfiler extends AbstractProfiler, Query> { +public class QueryProfiler extends AbstractProfiler, Query> { /** * The root Collector used in the search */ private InternalProfileComponent collector; - public QueryProfiler(boolean concurrent) { - super(concurrent ? new ConcurrentQueryProfileTree() : new InternalQueryProfileTree()); + public QueryProfiler(AbstractQueryProfileTree profileTree) { + super(profileTree); } /** Set the collector that is associated with this profiler. */ @@ -81,14 +81,14 @@ public void startRewriteTime() { /** * Stop recording the current rewrite and add it's time to the total tally, returning the * cumulative time so far. - * - * @return cumulative rewrite time */ - public long stopAndAddRewriteTime() { - return ((AbstractQueryProfileTree) profileTree).stopAndAddRewriteTime(); + public void stopAndAddRewriteTime() { + ((AbstractQueryProfileTree) profileTree).stopAndAddRewriteTime(); } /** + * The rewriting process is complex and hard to display because queries can undergo significant changes. + * Instead of showing intermediate results, we display the cumulative time for the non-concurrent search case. * @return total time taken to rewrite all queries in this profile */ public long getRewriteTime() { diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index d91384cd10048..6e1131cf1bafc 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -854,6 +854,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { weightedRoutingStats, null, null, + null, null ); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java b/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java new file mode 100644 index 0000000000000..a2e301143d694 --- /dev/null +++ b/server/src/test/java/org/opensearch/action/search/SearchQueryCategorizerTests.java @@ -0,0 +1,228 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.BoostingQueryBuilder; +import org.opensearch.index.query.MatchNoneQueryBuilder; +import org.opensearch.index.query.MatchQueryBuilder; +import org.opensearch.index.query.MultiMatchQueryBuilder; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.index.query.QueryStringQueryBuilder; +import org.opensearch.index.query.RangeQueryBuilder; +import org.opensearch.index.query.RegexpQueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.WildcardQueryBuilder; +import org.opensearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregationBuilder; +import org.opensearch.search.aggregations.bucket.terms.MultiTermsAggregationBuilder; +import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.sort.ScoreSortBuilder; +import org.opensearch.search.sort.SortOrder; +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.util.Arrays; + +import org.mockito.Mockito; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.when; + +public final class SearchQueryCategorizerTests extends OpenSearchTestCase { + + private MetricsRegistry metricsRegistry; + + private SearchQueryCategorizer searchQueryCategorizer; + + @Before + public void setup() { + metricsRegistry = mock(MetricsRegistry.class); + when(metricsRegistry.createCounter(any(String.class), any(String.class), any(String.class))).thenAnswer( + invocation -> mock(Counter.class) + ); + searchQueryCategorizer = new SearchQueryCategorizer(metricsRegistry); + } + + public void testAggregationsQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.aggregation( + new MultiTermsAggregationBuilder("agg1").terms( + Arrays.asList( + new MultiTermsValuesSourceConfig.Builder().setFieldName("username").build(), + new MultiTermsValuesSourceConfig.Builder().setFieldName("rating").build() + ) + ) + ); + sourceBuilder.size(0); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.aggCounter).add(eq(1.0d)); + } + + public void testBoolQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new BoolQueryBuilder().must(new MatchQueryBuilder("searchText", "fox"))); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.boolCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testFunctionScoreQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"))); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.functionScoreCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMatchQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.matchQuery("tags", "php")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMatchPhraseQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.matchPhraseQuery("tags", "php")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchPhrasePrefixCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testMultiMatchQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new MultiMatchQueryBuilder("foo bar", "myField")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.multiMatchCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testOtherQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + BoostingQueryBuilder queryBuilder = new BoostingQueryBuilder( + new TermQueryBuilder("unmapped_field", "foo"), + new MatchNoneQueryBuilder() + ); + sourceBuilder.query(queryBuilder); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.otherQueryCounter, times(2)).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testQueryStringQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + QueryStringQueryBuilder queryBuilder = new QueryStringQueryBuilder("foo:*"); + sourceBuilder.query(queryBuilder); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.queryStringQueryCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testRangeQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + RangeQueryBuilder rangeQuery = new RangeQueryBuilder("date"); + rangeQuery.gte("1970-01-01"); + rangeQuery.lt("1982-01-01"); + sourceBuilder.query(rangeQuery); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.rangeCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testRegexQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(new RegexpQueryBuilder("field", "text")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.regexCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testSortQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.query(QueryBuilders.matchQuery("tags", "ruby")); + sourceBuilder.sort("creationDate", SortOrder.DESC); + sourceBuilder.sort(new ScoreSortBuilder()); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.sortCounter, times(2)).add(eq(1.0d), any(Tags.class)); + } + + public void testTermQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(QueryBuilders.termQuery("field", "value2")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testWildcardQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + sourceBuilder.query(new WildcardQueryBuilder("field", "text")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.wildcardCounter).add(eq(1.0d), any(Tags.class)); + } + + public void testComplexQuery() { + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + sourceBuilder.size(50); + + TermQueryBuilder termQueryBuilder = QueryBuilders.termQuery("field", "value2"); + MatchQueryBuilder matchQueryBuilder = QueryBuilders.matchQuery("tags", "php"); + RegexpQueryBuilder regexpQueryBuilder = new RegexpQueryBuilder("field", "text"); + BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder().must(termQueryBuilder) + .filter(matchQueryBuilder) + .should(regexpQueryBuilder); + sourceBuilder.query(boolQueryBuilder); + sourceBuilder.aggregation(new RangeAggregationBuilder("agg1").field("num")); + + searchQueryCategorizer.categorize(sourceBuilder); + + Mockito.verify(searchQueryCategorizer.searchQueryCounters.termCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.matchCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.regexCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.boolCounter).add(eq(1.0d), any(Tags.class)); + Mockito.verify(searchQueryCategorizer.searchQueryCounters.aggCounter).add(eq(1.0d)); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 64949cf861f70..6f03e87bf5824 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -191,6 +191,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -218,6 +219,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -245,6 +247,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -303,6 +306,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -330,6 +334,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -357,6 +362,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index f37823d2c0c7d..1c0dc7fc1ca2d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -938,6 +938,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep Version.CURRENT, randomAlphaOfLength(10), false, + 1, + randomAlphaOfLength(10), Collections.emptyList(), randomAlphaOfLength(10), true diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index c7ed1cb732154..1d5c2a0f01b5c 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -840,7 +840,8 @@ public void testGatewayForRemoteStateForInitialBootstrap() throws IOException { remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, - ClusterState.EMPTY_STATE + ClusterState.EMPTY_STATE, + false ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); @@ -886,7 +887,8 @@ public void testGatewayForRemoteStateForNodeReplacement() throws IOException { remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, - ClusterState.EMPTY_STATE + ClusterState.EMPTY_STATE, + false ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); @@ -918,7 +920,13 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { .clusterUUID(randomAlphaOfLength(10)) .build() ); - gateway = newGatewayForRemoteState(remoteClusterStateService, remoteStoreRestoreService, persistedStateRegistry, clusterState); + gateway = newGatewayForRemoteState( + remoteClusterStateService, + remoteStoreRestoreService, + persistedStateRegistry, + clusterState, + false + ); final CoordinationState.PersistedState lucenePersistedState = gateway.getPersistedState(); PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); verifyNoInteractions(remoteClusterStateService); @@ -933,13 +941,77 @@ public void testGatewayForRemoteStateForNodeReboot() throws IOException { } } + public void testGatewayForRemoteStateForInitialBootstrapBlocksApplied() throws IOException { + MockGatewayMetaState gateway = null; + try { + final RemoteClusterStateService remoteClusterStateService = mock(RemoteClusterStateService.class); + when(remoteClusterStateService.getLastKnownUUIDFromRemote(clusterName.value())).thenReturn("test-cluster-uuid"); + + final IndexMetadata indexMetadata = IndexMetadata.builder("test-index1") + .settings( + settings(Version.CURRENT).put(SETTING_INDEX_UUID, randomAlphaOfLength(10)) + .put(IndexMetadata.INDEX_READ_ONLY_SETTING.getKey(), true) + ) + .numberOfShards(5) + .numberOfReplicas(1) + .build(); + + final ClusterState clusterState = ClusterState.builder( + createClusterState( + randomNonNegativeLong(), + Metadata.builder() + .coordinationMetadata(CoordinationMetadata.builder().term(randomLong()).build()) + .put(indexMetadata, false) + .clusterUUID(ClusterState.UNKNOWN_UUID) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build() + ) + ).nodes(DiscoveryNodes.EMPTY_NODES).build(); + + final RemoteStoreRestoreService remoteStoreRestoreService = mock(RemoteStoreRestoreService.class); + when(remoteStoreRestoreService.restore(any(), any(), anyBoolean(), any())).thenReturn( + RemoteRestoreResult.build("test-cluster-uuid", null, clusterState) + ); + final PersistedStateRegistry persistedStateRegistry = persistedStateRegistry(); + gateway = newGatewayForRemoteState( + remoteClusterStateService, + remoteStoreRestoreService, + persistedStateRegistry, + ClusterState.EMPTY_STATE, + true + ); + PersistedState remotePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.REMOTE); + PersistedState lucenePersistedState = persistedStateRegistry.getPersistedState(PersistedStateType.LOCAL); + verify(remoteClusterStateService).getLastKnownUUIDFromRemote(clusterName.value()); // change this + verify(remoteStoreRestoreService).restore(any(ClusterState.class), any(String.class), anyBoolean(), any(String[].class)); + assertThat(remotePersistedState.getLastAcceptedState(), nullValue()); + assertThat( + Metadata.isGlobalStateEquals(lucenePersistedState.getLastAcceptedState().metadata(), clusterState.metadata()), + equalTo(true) + ); + assertThat( + lucenePersistedState.getLastAcceptedState().blocks().hasGlobalBlock(Metadata.CLUSTER_READ_ONLY_BLOCK), + equalTo(true) + ); + assertThat( + IndexMetadata.INDEX_READ_ONLY_SETTING.get( + lucenePersistedState.getLastAcceptedState().metadata().index("test-index1").getSettings() + ), + equalTo(true) + ); + } finally { + IOUtils.close(gateway); + } + } + private MockGatewayMetaState newGatewayForRemoteState( RemoteClusterStateService remoteClusterStateService, RemoteStoreRestoreService remoteStoreRestoreService, PersistedStateRegistry persistedStateRegistry, - ClusterState currentState + ClusterState currentState, + boolean prepareFullState ) throws IOException { - MockGatewayMetaState gateway = new MockGatewayMetaState(localNode, bigArrays); + MockGatewayMetaState gateway = new MockGatewayMetaState(localNode, bigArrays, prepareFullState); String randomRepoName = "randomRepoName"; String stateRepoTypeAttributeKey = String.format( Locale.getDefault(), @@ -963,6 +1035,7 @@ private MockGatewayMetaState newGatewayForRemoteState( when(clusterService.getClusterSettings()).thenReturn( new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) ); + when(transportService.getLocalNode()).thenReturn(mock(DiscoveryNode.class)); final PersistedClusterStateService persistedClusterStateService = new PersistedClusterStateService( nodeEnvironment, xContentRegistry(), diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java index 66426c2a880a3..6c9a3201656d7 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java @@ -26,6 +26,33 @@ public class ClusterMetadataManifestTests extends OpenSearchTestCase { + public void testClusterMetadataManifestXContentV0() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( + 1L, + 1L, + "test-cluster-uuid", + "test-state-uuid", + Version.CURRENT, + "test-node-id", + false, + ClusterMetadataManifest.CODEC_V0, + null, + Collections.singletonList(uploadedIndexMetadata), + "prev-cluster-uuid", + true + ); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContentV0(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + public void testClusterMetadataManifestXContent() throws IOException { UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( @@ -36,6 +63,8 @@ public void testClusterMetadataManifestXContent() throws IOException { Version.CURRENT, "test-node-id", false, + ClusterMetadataManifest.CODEC_V1, + "test-global-metadata-file", Collections.singletonList(uploadedIndexMetadata), "prev-cluster-uuid", true @@ -60,6 +89,8 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { Version.CURRENT, "B10RX1f5RJenMQvYccCgSQ", true, + 1, + "test-global-metadata-file", randomUploadedIndexMetadataList(), "yfObdx8KSMKKrXf8UyHhM", true diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 1edfb7c35094e..36a7fd80a68ef 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -12,6 +12,7 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexGraveyard; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNodes; @@ -28,6 +29,7 @@ import org.opensearch.common.lucene.store.ByteArrayIndexInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; @@ -66,10 +68,11 @@ import org.mockito.ArgumentMatchers; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; -import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FILE_PREFIX; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -232,14 +235,15 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { assertThat(manifest.getIndices().get(0).getIndexName(), is(uploadedIndexMetadata.getIndexName())); assertThat(manifest.getIndices().get(0).getIndexUUID(), is(uploadedIndexMetadata.getIndexUUID())); assertThat(manifest.getIndices().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); - assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 1); - assertEquals(writeContextArgumentCaptor.getAllValues().size(), 1); + assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 2); + assertEquals(writeContextArgumentCaptor.getAllValues().size(), 2); WriteContext capturedWriteContext = writeContextArgumentCaptor.getValue(); byte[] writtenBytes = capturedWriteContext.getStreamProvider(Integer.MAX_VALUE).provideStream(0).getInputStream().readAllBytes(); @@ -263,7 +267,7 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { } - public void testWriteFullMetadataInParallelFailure() throws IOException { + public void testWriteFullMetadataFailureForGlobalMetadata() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); @@ -274,6 +278,27 @@ public void testWriteFullMetadataInParallelFailure() throws IOException { return null; }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); + assertThrows( + RemoteClusterStateService.GlobalMetadataTransferException.class, + () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + ); + } + + public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + + doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; + }).doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + return null; + }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); assertThrows( RemoteClusterStateService.IndexMetadataTransferException.class, @@ -340,6 +365,207 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + /* + * Here we will verify the migration of manifest file from codec V0 and V1. + * + * Initially codec version is 0 and global metadata is also null, we will perform index metadata update. + * In final manifest codec version should be 1 and + * global metadata should be updated, even if it was not changed in this cluster state update + */ + public void testMigrationFromCodecV0ManifestToCodecV1Manifest() throws IOException { + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .nodes(nodesWithLocalNodeClusterManager()) + .build(); + + // Update only index metadata + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(previousClusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(previousClusterState).metadata(newMetadata).build(); + + // previous manifest with codec 0 and null global metadata + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(ClusterMetadataManifest.CODEC_V0) + .globalMetadataFileName(null) + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifestAfterUpdate = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + newClusterState, + previousManifest + ); + + // global metadata is updated + assertThat(manifestAfterUpdate.getGlobalMetadataFileName(), notNullValue()); + // Manifest file with codec version with 1 is updated. + assertThat(manifestAfterUpdate.getCodecVersion(), is(ClusterMetadataManifest.CODEC_V1)); + } + + public void testWriteIncrementalGlobalMetadataSuccess() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .globalMetadataFileName("mock-filename") + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .build(); + + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + } + + /* + * Here we will verify index metadata is not uploaded again if change is only in global metadata + */ + public void testGlobalMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with index. + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + // Updating remote cluster state with changing index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only global metadata is different + Metadata newMetadata = Metadata.builder(clusterState.metadata()) + .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterIndexMetadataUpdate + ); + + // Verify that index metadata information is same in manifest files + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(manifestAfterGlobalMetadataUpdate.getIndices().size())); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexName(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexName()) + ); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexUUID(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexUUID()) + ); + + // since timestamp is part of file name, if file name is same we can confirm that file is not update in global metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getUploadedFilename(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getUploadedFilename()) + ); + + // global metadata file would have changed + assertFalse( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName() + .equalsIgnoreCase(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + } + + /* + * Here we will verify global metadata is not uploaded again if change is only in index metadata + */ + public void testIndexMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with global metadata. + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + + // Updating remote cluster state with changing global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only Index metadata is different + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(clusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterGlobalMetadataUpdate + ); + + // Verify that global metadata information is same in manifest files after updating index Metadata + // since timestamp is part of file name, if file name is same we can confirm that file is not update in index metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName(), + is(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + + // Index metadata would have changed + assertThat(manifestAfterGlobalMetadataUpdate.getIndices().size(), is(0)); + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(1)); + } + public void testReadLatestMetadataManifestFailedIOException() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); @@ -404,6 +630,7 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); BlobContainer blobContainer = mockBlobStoreObjects(); @@ -411,7 +638,8 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE remoteClusterStateService.start(); assertEquals( - remoteClusterStateService.getLatestIndexMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() .size(), 0 ); @@ -439,10 +667,8 @@ public void testReadLatestMetadataManifestSuccessButIndexMetadataFetchIOExceptio remoteClusterStateService.start(); Exception e = assertThrows( IllegalStateException.class, - () -> remoteClusterStateService.getLatestIndexMetadata( - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID() - ) + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() ); assertEquals(e.getMessage(), "Error while downloading IndexMetadata - " + uploadedIndexMetadata.getUploadedFilename()); } @@ -460,6 +686,7 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { .clusterUUID("cluster-uuid") .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .codecVersion(ClusterMetadataManifest.CODEC_V0) .previousClusterUUID("prev-cluster-uuid") .build(); @@ -480,6 +707,70 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + public void testReadGlobalMetadata() throws IOException { + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry( + List.of(new NamedXContentRegistry.Entry(Metadata.Custom.class, new ParseField(IndexGraveyard.TYPE), IndexGraveyard::fromXContent)))); + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName("global-metadata-file") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expactedMetadata); + + Metadata metadata = remoteClusterStateService.getLatestMetadata( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + + assertTrue(Metadata.isGlobalStateEquals(metadata, expactedMetadata)); + } + + public void testReadGlobalMetadataIOException() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + String globalIndexMetadataName = "global-metadata-file"; + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName(globalIndexMetadataName) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + + BlobContainer blobContainer = mockBlobStoreObjects(); + mockBlobContainerForGlobalMetadata(blobContainer, expectedManifest, expactedMetadata); + + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(globalIndexMetadataName))).thenThrow( + FileNotFoundException.class + ); + + remoteClusterStateService.start(); + Exception e = assertThrows( + IllegalStateException.class, + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + ); + assertEquals(e.getMessage(), "Error while downloading Global Metadata - " + globalIndexMetadataName); + } + public void testReadLatestIndexMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); @@ -506,14 +797,16 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .globalMetadataFileName("global-metadata-file") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); mockBlobContainer(mockBlobStoreObjects(), expectedManifest, Map.of(index.getUUID(), indexMetadata)); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() - ); + ).getIndices(); assertEquals(indexMetadataMap.size(), 1); assertEquals(indexMetadataMap.get(index.getUUID()).getIndex().getName(), index.getName()); @@ -716,7 +1009,7 @@ public void testFileNames() { String indexMetadataFileName = RemoteClusterStateService.indexMetadataFileName(indexMetadata); String[] splittedIndexMetadataFileName = indexMetadataFileName.split(DELIMITER); assertThat(indexMetadataFileName.split(DELIMITER).length, is(4)); - assertThat(splittedIndexMetadataFileName[0], is(INDEX_METADATA_FILE_PREFIX)); + assertThat(splittedIndexMetadataFileName[0], is(METADATA_FILE_PREFIX)); assertThat(splittedIndexMetadataFileName[1], is(RemoteStoreUtils.invertLong(indexMetadata.getVersion()))); assertThat(splittedIndexMetadataFileName[3], is(String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION))); @@ -845,6 +1138,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) + .globalMetadataFileName("test-global-metadata") .build(); } @@ -884,7 +1178,8 @@ private void mockBlobContainer( BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, "manifestFileName", - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); @@ -898,7 +1193,8 @@ private void mockBlobContainer( BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( indexMetadata, fileName, - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob(fileName + ".dat")).thenReturn( new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) @@ -909,6 +1205,57 @@ private void mockBlobContainer( }); } + private void mockBlobContainerForGlobalMetadata( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Metadata metadata + ) throws IOException { + String mockManifestFileName = "manifest__1__2__C__456__1"; + BlobMetadata blobMetadata = new PlainBlobMetadata(mockManifestFileName, 1); + when( + blobContainer.listBlobsByPrefixInSortedOrder( + "manifest" + RemoteClusterStateService.DELIMITER, + 1, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(Arrays.asList(blobMetadata)); + + BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( + clusterMetadataManifest, + mockManifestFileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( + new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + ); + } + + private static ClusterState.Builder generateClusterStateWithGlobalMetadata() { + final Settings clusterSettings = Settings.builder().put("cluster.blocks.read_only", true).build(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + + return ClusterState.builder(ClusterName.DEFAULT) + .version(1L) + .stateUUID("state-uuid") + .metadata( + Metadata.builder() + .persistentSettings(clusterSettings) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() + ); + } + private static ClusterState.Builder generateClusterStateWithOneIndex() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java b/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java new file mode 100644 index 0000000000000..18b814aec61c2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/QueryShapeVisitorTests.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.opensearch.test.OpenSearchTestCase; + +import static org.junit.Assert.assertEquals; + +public final class QueryShapeVisitorTests extends OpenSearchTestCase { + public void testQueryShapeVisitor() { + QueryBuilder builder = new BoolQueryBuilder().must(new TermQueryBuilder("foo", "bar")) + .filter(new ConstantScoreQueryBuilder(new RangeQueryBuilder("timestamp").from("12345677").to("2345678"))) + .should( + new BoolQueryBuilder().must(new MatchQueryBuilder("text", "this is some text")) + .mustNot(new RegexpQueryBuilder("color", "red.*")) + ) + .must(new TermsQueryBuilder("genre", "action", "drama", "romance")); + QueryShapeVisitor shapeVisitor = new QueryShapeVisitor(); + builder.visit(shapeVisitor); + assertEquals( + "{\"type\":\"bool\",\"must\"[{\"type\":\"term\"},{\"type\":\"terms\"}],\"filter\"[{\"type\":\"constant_score\",\"filter\"[{\"type\":\"range\"}]}],\"should\"[{\"type\":\"bool\",\"must\"[{\"type\":\"match\"}],\"must_not\"[{\"type\":\"regexp\"}]}]}", + shapeVisitor.toJson() + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index 6a99063d11353..fe389e3b3fcb4 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -43,6 +43,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -388,9 +389,10 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { - super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, listener); + super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); runAfterGetFiles[index.getAndIncrement()].run(); } diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index 5a13f57db2c87..51814283c5eb3 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -520,8 +520,8 @@ private Tuple mockIn if (counter.incrementAndGet() <= succeedOnAttempt) { throw new RuntimeException("Inducing failure in upload"); } - return indexShard.getLatestSegmentInfosAndCheckpoint(); - })).when(shard).getLatestSegmentInfosAndCheckpoint(); + return indexShard.getLatestReplicationCheckpoint(); + })).when(shard).computeReplicationCheckpoint(any()); doAnswer(invocation -> { if (Objects.nonNull(successLatch)) { diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 528402d48658a..eab38bfe5c64d 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -91,6 +91,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; import java.util.function.Function; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -725,6 +726,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // set the listener, we will only fail it once we cancel the source. @@ -923,6 +925,33 @@ public void testSnapshotWhileFailoverIncomplete() throws Exception { } } + public void testReuseReplicationCheckpointWhenLatestInfosIsUnChanged() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, indexMapping, new NRTReplicationEngineFactory(), createTempDir())) { + final IndexShard primaryShard = shards.getPrimary(); + shards.startAll(); + shards.indexDocs(10); + shards.refresh("test"); + replicateSegments(primaryShard, shards.getReplicas()); + shards.assertAllEqual(10); + final ReplicationCheckpoint latestReplicationCheckpoint = primaryShard.getLatestReplicationCheckpoint(); + try (GatedCloseable segmentInfosSnapshot = primaryShard.getSegmentInfosSnapshot()) { + assertEquals(latestReplicationCheckpoint, primaryShard.computeReplicationCheckpoint(segmentInfosSnapshot.get())); + } + final Tuple, ReplicationCheckpoint> latestSegmentInfosAndCheckpoint = primaryShard + .getLatestSegmentInfosAndCheckpoint(); + try (final GatedCloseable closeable = latestSegmentInfosAndCheckpoint.v1()) { + assertEquals(latestReplicationCheckpoint, primaryShard.computeReplicationCheckpoint(closeable.get())); + } + } + } + + public void testComputeReplicationCheckpointNullInfosReturnsEmptyCheckpoint() throws Exception { + try (ReplicationGroup shards = createGroup(1, settings, indexMapping, new NRTReplicationEngineFactory(), createTempDir())) { + final IndexShard primaryShard = shards.getPrimary(); + assertEquals(ReplicationCheckpoint.empty(primaryShard.shardId), primaryShard.computeReplicationCheckpoint(null)); + } + } + private SnapshotShardsService getSnapshotShardsService(IndexShard replicaShard) { final TransportService transportService = mock(TransportService.class); when(transportService.getThreadPool()).thenReturn(threadPool); diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java index c394101697b47..f0950fe5392de 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java @@ -47,6 +47,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; @@ -87,6 +88,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // randomly resolve the listener, indicating the source has resolved. @@ -131,6 +133,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be reached"); @@ -176,6 +179,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); @@ -223,6 +227,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) {} }; @@ -269,6 +274,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); diff --git a/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java b/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java index 588d9e8bb13a2..6d8b3fe4d69fb 100644 --- a/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java +++ b/server/src/test/java/org/opensearch/index/store/RemoteStoreFileDownloaderTests.java @@ -9,12 +9,18 @@ package org.opensearch.index.store; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.OpenSearchTimeoutException; +import org.opensearch.action.support.PlainActionFuture; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.CancellableThreads; +import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.shard.ShardId; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.test.OpenSearchTestCase; @@ -31,8 +37,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; public class RemoteStoreFileDownloaderTests extends OpenSearchTestCase { @@ -76,31 +84,132 @@ public void stopThreadPool() throws Exception { } public void testDownload() throws IOException { - fileDownloader.download(source, destination, files.keySet()); + final PlainActionFuture l = new PlainActionFuture<>(); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, files.keySet(), l); + l.actionGet(); assertContent(files, destination); } - public void testDownloadWithSecondDestination() throws IOException { + public void testDownloadWithSecondDestination() throws IOException, InterruptedException { fileDownloader.download(source, destination, secondDestination, files.keySet(), () -> {}); assertContent(files, destination); assertContent(files, secondDestination); } - public void testDownloadWithFileCompletionHandler() throws IOException { + public void testDownloadWithFileCompletionHandler() throws IOException, InterruptedException { final AtomicInteger counter = new AtomicInteger(0); fileDownloader.download(source, destination, null, files.keySet(), counter::incrementAndGet); assertContent(files, destination); assertEquals(files.size(), counter.get()); } - public void testDownloadNonExistentFile() { - assertThrows(NoSuchFileException.class, () -> fileDownloader.download(source, destination, Set.of("not real"))); + public void testDownloadNonExistentFile() throws InterruptedException { + final CountDownLatch latch = new CountDownLatch(1); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, Set.of("not real"), new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(NoSuchFileException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); } - public void testDownloadExtraNonExistentFile() { - List filesWithExtra = new ArrayList<>(files.keySet()); + public void testDownloadExtraNonExistentFile() throws InterruptedException { + final CountDownLatch latch = new CountDownLatch(1); + final List filesWithExtra = new ArrayList<>(files.keySet()); filesWithExtra.add("not real"); - assertThrows(NoSuchFileException.class, () -> fileDownloader.download(source, destination, filesWithExtra)); + fileDownloader.downloadAsync(new CancellableThreads(), source, destination, filesWithExtra, new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(NoSuchFileException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); + } + + public void testCancellable() { + final CancellableThreads cancellableThreads = new CancellableThreads(); + final PlainActionFuture blockingListener = new PlainActionFuture<>(); + final Directory blockingDestination = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) { + try { + Thread.sleep(60_000); // Will be interrupted + fail("Expected to be interrupted"); + } catch (InterruptedException e) { + throw new RuntimeException("Failed due to interrupt", e); + } + } + }; + fileDownloader.downloadAsync(cancellableThreads, source, blockingDestination, files.keySet(), blockingListener); + assertThrows( + "Expected to timeout due to blocking directory", + OpenSearchTimeoutException.class, + () -> blockingListener.actionGet(TimeValue.timeValueMillis(500)) + ); + cancellableThreads.cancel("test"); + assertThrows( + "Expected to complete with cancellation failure", + CancellableThreads.ExecutionCancelledException.class, + blockingListener::actionGet + ); + } + + public void testBlockingCallCanBeInterrupted() throws Exception { + final Directory blockingDestination = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) { + try { + Thread.sleep(60_000); // Will be interrupted + fail("Expected to be interrupted"); + } catch (InterruptedException e) { + throw new RuntimeException("Failed due to interrupt", e); + } + } + }; + final AtomicReference capturedException = new AtomicReference<>(); + final Thread thread = new Thread(() -> { + try { + fileDownloader.download(source, blockingDestination, null, files.keySet(), () -> {}); + } catch (Exception e) { + capturedException.set(e); + } + }); + thread.start(); + thread.interrupt(); + thread.join(); + assertEquals(InterruptedException.class, capturedException.get().getClass()); + } + + public void testIOException() throws IOException, InterruptedException { + final Directory failureDirectory = new FilterDirectory(destination) { + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { + throw new IOException("test"); + } + }; + assertThrows(IOException.class, () -> fileDownloader.download(source, failureDirectory, null, files.keySet(), () -> {})); + + final CountDownLatch latch = new CountDownLatch(1); + fileDownloader.downloadAsync(new CancellableThreads(), source, failureDirectory, files.keySet(), new ActionListener<>() { + @Override + public void onResponse(Void unused) {} + + @Override + public void onFailure(Exception e) { + assertEquals(IOException.class, e.getClass()); + latch.countDown(); + } + }); + assertTrue(latch.await(10, TimeUnit.SECONDS)); } private static void assertContent(Map expected, Directory destination) throws IOException { diff --git a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java index bcacef83d190a..e4dd32e5c6f70 100644 --- a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java @@ -125,6 +125,7 @@ public void testGetSegmentFiles() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -153,6 +154,7 @@ public void testTransportTimeoutForGetSegmentFilesAction() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -178,6 +180,7 @@ public void testGetSegmentFiles_CancelWhileRequestOpen() throws InterruptedExcep checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, new ActionListener<>() { @Override public void onResponse(GetSegmentFilesResponse getSegmentFilesResponse) { diff --git a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java index 9204f48ba5bdd..287962b158c79 100644 --- a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java @@ -90,7 +90,7 @@ public void testGetSegmentFiles() throws ExecutionException, InterruptedExceptio List filesToFetch = primaryShard.getSegmentMetadataMap().values().stream().collect(Collectors.toList()); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, res); + replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, (fileName, bytesRecovered) -> {}, res); GetSegmentFilesResponse response = res.get(); assertEquals(response.files.size(), filesToFetch.size()); assertTrue(response.files.containsAll(filesToFetch)); @@ -104,7 +104,14 @@ public void testGetSegmentFilesAlreadyExists() throws IOException, InterruptedEx try { final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + filesToFetch, + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); res.get(); } catch (AssertionError | ExecutionException ex) { latch.countDown(); @@ -118,7 +125,14 @@ public void testGetSegmentFilesReturnEmptyResponse() throws ExecutionException, final ReplicationCheckpoint checkpoint = primaryShard.getLatestReplicationCheckpoint(); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, Collections.emptyList(), primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + Collections.emptyList(), + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); GetSegmentFilesResponse response = res.get(); assert (response.files.isEmpty()); } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index c108de5ee5ea6..7b02635525264 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -39,6 +39,7 @@ import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -52,6 +53,7 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; @@ -211,6 +213,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be called"); @@ -246,7 +249,7 @@ public void testAlreadyOnNewCheckpoint() { verify(spy, times(0)).startReplication(any(), any(), any()); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8928") + @TestLogging(reason = "Getting trace logs from replication package", value = "org.opensearch.indices.replication:TRACE") public void testShardAlreadyReplicating() { CountDownLatch blockGetCheckpointMetadata = new CountDownLatch(1); SegmentReplicationSource source = new TestReplicationSource() { @@ -276,6 +279,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); @@ -333,6 +337,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java index a9d7d3cdd32fc..8b4b3aff701b4 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java @@ -40,6 +40,7 @@ import org.opensearch.index.store.StoreTests; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationFailedException; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; @@ -53,6 +54,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.BiConsumer; import org.mockito.Mockito; @@ -131,10 +133,12 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { assertEquals(1, filesToFetch.size()); assert (filesToFetch.contains(SEGMENT_FILE)); + filesToFetch.forEach(storeFileMetadata -> fileProgressTracker.accept(storeFileMetadata.name(), storeFileMetadata.length())); listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } }; @@ -149,6 +153,19 @@ public void getSegmentFiles( public void onResponse(Void replicationResponse) { try { verify(spyIndexShard, times(1)).finalizeReplication(any()); + assertEquals( + 1, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); + assertEquals( + 0, + segrepTarget.state().getIndex().fileDetails().stream().filter(file -> file.fullyRecovered() == false).count() + ); segrepTarget.markAsDone(); } catch (IOException ex) { Assert.fail(); @@ -182,6 +199,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -200,6 +218,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -225,6 +252,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onFailure(exception); @@ -243,6 +271,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -268,6 +305,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -314,6 +352,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -358,6 +397,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -376,6 +416,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertTrue(e instanceof OpenSearchCorruptionException); assertTrue(e.getMessage().contains("has local copies of segments that differ from the primary")); segrepTarget.fail(new ReplicationFailedException(e), false); @@ -410,6 +459,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java index c4599a6e7a00e..43ebb86fd5342 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java @@ -219,20 +219,17 @@ public void testRepositoriesStatsCanHaveTheSameNameAndDifferentTypeOverTime() { assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", emptyState(), clusterStateWithRepoTypeA)); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(0)); ClusterState clusterStateWithRepoTypeB = createClusterStateWithRepo(repoName, MeteredRepositoryTypeB.TYPE); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); List repositoriesStats = repositoriesService.repositoriesStats(); - assertThat(repositoriesStats.size(), equalTo(2)); + assertThat(repositoriesStats.size(), equalTo(1)); RepositoryStatsSnapshot repositoryStatsTypeA = repositoriesStats.get(0); - assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeA.TYPE)); - assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeA.STATS)); + assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); + assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); - RepositoryStatsSnapshot repositoryStatsTypeB = repositoriesStats.get(1); - assertThat(repositoryStatsTypeB.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); - assertThat(repositoryStatsTypeB.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); } public void testWithSameKeyProviderNames() { @@ -258,7 +255,7 @@ public void testWithSameKeyProviderNames() { kpTypeA ); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(2)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); MeteredRepositoryTypeB repositoryB = (MeteredRepositoryTypeB) repositoriesService.repository("repoName"); assertNotNull(repositoryB); assertNotNull(repository.cryptoHandler); diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java index cf0b06a3f7d16..da0cbcb1d4b17 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.UUIDs; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; @@ -122,14 +121,11 @@ private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repository private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repositoryStats, long clusterVersion) { RepositoryInfo repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), randomAlphaOfLength(10), randomAlphaOfLength(10), - Map.of("bucket", randomAlphaOfLength(10)), - System.currentTimeMillis(), - null + Map.of("bucket", randomAlphaOfLength(10)) ); - return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion); } } diff --git a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java index f29ba3b0cea07..db14eb90ef839 100644 --- a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java +++ b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfileBreakdownTests.java @@ -333,6 +333,58 @@ public void testBreakDownMapWithMultipleSlicesAndOneSliceWithNoLeafContext() thr directory.close(); } + public void testOneLeafContextWithEmptySliceCollectorsToLeaves() throws Exception { + final DirectoryReader directoryReader = getDirectoryReader(1); + final Directory directory = directoryReader.directory(); + final long createWeightEarliestStartTime = createWeightTimer.getEarliestTimerStartTime(); + final long createWeightEndTime = createWeightEarliestStartTime + createWeightTimer.getApproximateTiming(); + final Map leafProfileBreakdownMap_1 = getLeafBreakdownMap(createWeightEndTime + 10, 10, 1); + final AbstractProfileBreakdown leafProfileBreakdown_1 = new TestQueryProfileBreakdown( + QueryTimingType.class, + leafProfileBreakdownMap_1 + ); + testQueryProfileBreakdown.getContexts().put(directoryReader.leaves().get(0), leafProfileBreakdown_1); + final Map queryBreakDownMap = testQueryProfileBreakdown.toBreakdownMap(); + assertFalse(queryBreakDownMap == null || queryBreakDownMap.isEmpty()); + assertEquals(26, queryBreakDownMap.size()); + for (QueryTimingType queryTimingType : QueryTimingType.values()) { + String timingTypeKey = queryTimingType.toString(); + String timingTypeCountKey = queryTimingType + TIMING_TYPE_COUNT_SUFFIX; + + if (queryTimingType.equals(QueryTimingType.CREATE_WEIGHT)) { + final long createWeightTime = queryBreakDownMap.get(timingTypeKey); + assertEquals(createWeightTimer.getApproximateTiming(), createWeightTime); + assertEquals(1, (long) queryBreakDownMap.get(timingTypeCountKey)); + // verify there is no min/max/avg for weight type stats + assertFalse( + queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey) + ); + continue; + } + assertNotNull(queryBreakDownMap.get(timingTypeKey)); + assertNotNull(queryBreakDownMap.get(timingTypeCountKey)); + // verify there is no min/max/avg for current breakdown type stats + assertFalse( + queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.MAX_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(MIN_PREFIX + timingTypeCountKey) + || queryBreakDownMap.containsKey(ConcurrentQueryProfileBreakdown.AVG_PREFIX + timingTypeCountKey) + ); + } + assertEquals(0, testQueryProfileBreakdown.getMaxSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getMinSliceNodeTime()); + assertEquals(0, testQueryProfileBreakdown.getAvgSliceNodeTime()); + directoryReader.close(); + directory.close(); + } + private Map getLeafBreakdownMap(long startTime, long timeTaken, long count) { Map leafBreakDownMap = new HashMap<>(); for (QueryTimingType timingType : QueryTimingType.values()) { diff --git a/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java new file mode 100644 index 0000000000000..736bbcdd9e8dd --- /dev/null +++ b/server/src/test/java/org/opensearch/search/profile/query/ConcurrentQueryProfilerTests.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.profile.query; + +import org.opensearch.search.profile.Timer; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.LinkedList; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; + +public class ConcurrentQueryProfilerTests extends OpenSearchTestCase { + + public void testMergeRewriteTimeIntervals() { + ConcurrentQueryProfiler profiler = new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()); + List timers = new LinkedList<>(); + timers.add(new Timer(217134L, 1L, 1L, 0L, 553074511206907L)); + timers.add(new Timer(228954L, 1L, 1L, 0L, 553074509287335L)); + timers.add(new Timer(228954L, 1L, 1L, 0L, 553074509287336L)); + LinkedList mergedIntervals = profiler.mergeRewriteTimeIntervals(timers); + assertThat(mergedIntervals.size(), equalTo(2)); + long[] interval = mergedIntervals.get(0); + assertThat(interval[0], equalTo(553074509287335L)); + assertThat(interval[1], equalTo(553074509516290L)); + interval = mergedIntervals.get(1); + assertThat(interval[0], equalTo(553074511206907L)); + assertThat(interval[1], equalTo(553074511424041L)); + } +} diff --git a/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java b/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java index 64a440b85eb10..481a224f2ff0e 100644 --- a/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java +++ b/server/src/test/java/org/opensearch/search/profile/query/QueryProfilerTests.java @@ -161,7 +161,9 @@ public void tearDown() throws Exception { } public void testBasic() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.search(query, 1); @@ -228,7 +230,9 @@ public void testBasic() throws IOException { } public void testNoScoring() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.search(query, 1, Sort.INDEXORDER); // scores are not needed @@ -295,7 +299,9 @@ public void testNoScoring() throws IOException { } public void testUseIndexStats() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new TermQuery(new Term("foo", "bar")); searcher.count(query); // will use index stats @@ -309,7 +315,9 @@ public void testUseIndexStats() throws IOException { } public void testApproximations() throws IOException { - QueryProfiler profiler = new QueryProfiler(executor != null); + QueryProfiler profiler = executor != null + ? new ConcurrentQueryProfiler(new ConcurrentQueryProfileTree()) + : new QueryProfiler(new InternalQueryProfileTree()); searcher.setProfiler(profiler); Query query = new RandomApproximationQuery(new TermQuery(new Term("foo", "bar")), random()); searcher.count(query); diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index 03f0d27188027..c114b56bd0b39 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -152,14 +152,16 @@ public void onFailure(Exception e) { mockBlobContainer, "check-smile", CompressorRegistry.none(), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), mockBlobContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 97c5d23831965..2f9f38d18a064 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -222,6 +222,7 @@ import org.opensearch.search.query.QueryPhase; import org.opensearch.snapshots.mockstore.MockEventuallyConsistentRepository; import org.opensearch.tasks.TaskResourceTrackingService; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.disruption.DisruptableMockTransport; @@ -2302,7 +2303,8 @@ public void onFailure(final Exception e) { List.of(), client ), - null + null, + NoopMetricsRegistry.INSTANCE ) ); actions.put( diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index a520b6278ea47..60a54110fd0b4 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -121,7 +121,8 @@ List adjustNodesStats(List nodesStats) { nodeStats.getWeightedRoutingStats(), nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), - nodeStats.getSearchPipelineStats() + nodeStats.getSearchPipelineStats(), + nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); } diff --git a/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java b/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java index d77596cf5cdd1..2f006a5519d69 100644 --- a/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java +++ b/test/framework/src/main/java/org/opensearch/gateway/MockGatewayMetaState.java @@ -67,6 +67,12 @@ public class MockGatewayMetaState extends GatewayMetaState { private final BigArrays bigArrays; private final RemoteClusterStateService remoteClusterStateService; private final RemoteStoreRestoreService remoteStoreRestoreService; + private boolean prepareFullState = false; + + public MockGatewayMetaState(DiscoveryNode localNode, BigArrays bigArrays, boolean prepareFullState) { + this(localNode, bigArrays); + this.prepareFullState = prepareFullState; + } public MockGatewayMetaState(DiscoveryNode localNode, BigArrays bigArrays) { this.localNode = localNode; @@ -99,8 +105,12 @@ Metadata upgradeMetadataForNode( @Override ClusterState prepareInitialClusterState(TransportService transportService, ClusterService clusterService, ClusterState clusterState) { - // Just set localNode here, not to mess with ClusterService and IndicesService mocking - return ClusterStateUpdaters.setLocalNode(clusterState, localNode); + if (prepareFullState) { + return super.prepareInitialClusterState(transportService, clusterService, clusterState); + } else { + // Just set localNode here, not to mess with ClusterService and IndicesService mocking + return ClusterStateUpdaters.setLocalNode(clusterState, localNode); + } } @Override @@ -113,6 +123,16 @@ public void start( NodeEnvironment nodeEnvironment, NamedXContentRegistry xContentRegistry, PersistedStateRegistry persistedStateRegistry + ) { + start(settings, nodeEnvironment, xContentRegistry, persistedStateRegistry, false); + } + + public void start( + Settings settings, + NodeEnvironment nodeEnvironment, + NamedXContentRegistry xContentRegistry, + PersistedStateRegistry persistedStateRegistry, + boolean prepareFullState ) { final TransportService transportService = mock(TransportService.class); when(transportService.getThreadPool()).thenReturn(mock(ThreadPool.class)); @@ -126,6 +146,7 @@ public void start( } catch (IOException e) { throw new AssertionError(e); } + this.prepareFullState = prepareFullState; start( settings, transportService, diff --git a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java index b29e25a0bff2c..bcd47e3d578ee 100644 --- a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java +++ b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java @@ -17,6 +17,7 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import java.util.List; +import java.util.function.BiConsumer; /** * This class is used by unit tests implementing SegmentReplicationSource @@ -36,6 +37,7 @@ public abstract void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 9dc230474482f..412d5235fe462 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -174,6 +174,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; @@ -1620,6 +1621,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try ( diff --git a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java index dff9b997d87db..faa9d52b105b2 100644 --- a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java @@ -238,7 +238,7 @@ public void testRequestStats() throws Exception { assertEquals(assertionErrorMsg, mockCalls, sdkRequestCounts); } - private Map getMockRequestCounts() { + protected Map getMockRequestCounts() { for (HttpHandler h : handlers.values()) { while (h instanceof DelegatingHttpHandler) { if (h instanceof HttpStatsCollectorHandler) { diff --git a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java index 1bb1e44a8a600..0ee889af5ce1a 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/AbstractSnapshotIntegTestCase.java @@ -331,6 +331,12 @@ public static void blockNodeOnAnyFiles(String repository, String nodeName) { ); } + public static void blockNodeOnAnySegmentFile(String repository, String nodeName) { + ((MockRepository) internalCluster().getInstance(RepositoriesService.class, nodeName).repository(repository)).blockOnSegmentFiles( + true + ); + } + public static void blockDataNode(String repository, String nodeName) { ((MockRepository) internalCluster().getInstance(RepositoriesService.class, nodeName).repository(repository)).blockOnDataFiles(true); } diff --git a/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java b/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java index 7db71c4be0968..72c4ba44d0a31 100644 --- a/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java +++ b/test/framework/src/main/java/org/opensearch/snapshots/mockstore/MockRepository.java @@ -139,6 +139,8 @@ public long getFailureCount() { private volatile boolean blockOnDataFiles; + private volatile boolean blockOnSegmentFiles; + private volatile boolean blockOnDeleteIndexN; /** @@ -190,6 +192,7 @@ public MockRepository( maximumNumberOfFailures = metadata.settings().getAsLong("max_failure_number", 100L); blockOnAnyFiles = metadata.settings().getAsBoolean("block_on_control", false); blockOnDataFiles = metadata.settings().getAsBoolean("block_on_data", false); + blockOnSegmentFiles = metadata.settings().getAsBoolean("block_on_segment", false); blockAndFailOnWriteSnapFile = metadata.settings().getAsBoolean("block_on_snap", false); randomPrefix = metadata.settings().get("random", "default"); waitAfterUnblock = metadata.settings().getAsLong("wait_after_unblock", 0L); @@ -237,6 +240,7 @@ public synchronized void unblock() { blocked = false; // Clean blocking flags, so we wouldn't try to block again blockOnDataFiles = false; + blockOnSegmentFiles = false; blockOnAnyFiles = false; blockAndFailOnWriteIndexFile = false; blockOnWriteIndexFile = false; @@ -259,6 +263,14 @@ public void setBlockOnAnyFiles(boolean blocked) { blockOnAnyFiles = blocked; } + public void blockOnSegmentFiles(boolean blocked) { + blockOnSegmentFiles = blocked; + } + + public void setBlockOnSegmentFiles(boolean blocked) { + blockOnSegmentFiles = blocked; + } + public void setBlockAndFailOnWriteSnapFiles(boolean blocked) { blockAndFailOnWriteSnapFile = blocked; } @@ -306,6 +318,7 @@ private synchronized boolean blockExecution() { boolean wasBlocked = false; try { while (blockOnDataFiles + || blockOnSegmentFiles || blockOnAnyFiles || blockAndFailOnWriteIndexFile || blockOnWriteIndexFile @@ -407,6 +420,8 @@ private void maybeIOExceptionOrBlock(String blobName) throws IOException { blockExecutionAndMaybeWait(blobName); } else if (blobName.startsWith("snap-") && blockAndFailOnWriteSnapFile) { blockExecutionAndFail(blobName); + } else if (blockOnSegmentFiles && blobName.contains(".si__")) { + blockExecutionAndMaybeWait(blobName); } } } diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 3c7423f73685f..898e125b94954 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1853,10 +1853,12 @@ public synchronized void stopRandomNodeNotCurrentMaster() throws IOException { */ public void stopAllNodes() { try { - int totalDataNodes = numDataNodes(); - while (totalDataNodes > 0) { - stopRandomDataNode(); - totalDataNodes -= 1; + if (numDataAndClusterManagerNodes() != numClusterManagerNodes()) { + int totalDataNodes = numDataNodes(); + while (totalDataNodes > 0) { + stopRandomDataNode(); + totalDataNodes -= 1; + } } int totalClusterManagerNodes = numClusterManagerNodes(); while (totalClusterManagerNodes > 1) { @@ -2719,6 +2721,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat(