diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c47b9e0b69256..908a032bf833e 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -17,6 +17,7 @@ Resolves #[Issue number to be closed when this PR is merged] - [ ] All tests pass - [ ] New functionality has been documented. - [ ] New functionality has javadoc added +- [ ] Failing checks are inspected and point to the corresponding known issue(s) (See: [Troubleshooting Failing Builds](../blob/main/CONTRIBUTING.md#troubleshooting-failing-builds)) - [ ] Commits are signed per the DCO using --signoff - [ ] Commit changes are listed out in CHANGELOG.md file (See: [Changelog](../blob/main/CONTRIBUTING.md#changelog)) - [ ] Public documentation issue/PR [created](https://github.com/opensearch-project/documentation-website/issues/new/choose) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94406059a2194..8c7e3ee151d64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -94,6 +94,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) - Introduce ConcurrentQueryProfiler to profile query using concurrent segment search path and support concurrency during rewrite and create weight ([10352](https://github.com/opensearch-project/OpenSearch/pull/10352)) +- [Remote cluster state] Make index and global metadata upload timeout dynamic cluster settings ([#10814](https://github.com/opensearch-project/OpenSearch/pull/10814)) +- Added cluster setting cluster.restrict.index.replication_type to restrict setting of index setting replication type ([#10866](https://github.com/opensearch-project/OpenSearch/pull/10866)) +- Add cluster state stats ([#10670](https://github.com/opensearch-project/OpenSearch/pull/10670)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) @@ -106,6 +109,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `org.codehaus.woodstox:stax2-api` from 4.2.1 to 4.2.2 ([#10639](https://github.com/opensearch-project/OpenSearch/pull/10639)) - Bump `com.google.http-client:google-http-client` from 1.43.2 to 1.43.3 ([#10635](https://github.com/opensearch-project/OpenSearch/pull/10635)) - Bump `com.squareup.okio:okio` from 3.5.0 to 3.6.0 ([#10637](https://github.com/opensearch-project/OpenSearch/pull/10637)) +- Bump `org.apache.logging.log4j:log4j-core` from 2.20.0 to 2.21.0 ([#10858](https://github.com/opensearch-project/OpenSearch/pull/10858)) ### Changed - Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) @@ -114,6 +118,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) +- Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) ### Deprecated diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d379d78829318..4a1162cf2558b 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,6 +8,7 @@ - [Developer Certificate of Origin](#developer-certificate-of-origin) - [Changelog](#changelog) - [Review Process](#review-process) + - [Troubleshooting Failing Builds](#troubleshooting-failing-builds) # Contributing to OpenSearch @@ -162,3 +163,14 @@ During the PR process, expect that there will be some back-and-forth. Please try If we accept the PR, a [maintainer](MAINTAINERS.md) will merge your change and usually take care of backporting it to appropriate branches ourselves. If we reject the PR, we will close the pull request with a comment explaining why. This decision isn't always final: if you feel we have misunderstood your intended change or otherwise think that we should reconsider then please continue the conversation with a comment on the PR and we'll do our best to address any further points you raise. + +## Troubleshooting Failing Builds + +The OpenSearch testing framework offers many capabilities but exhibits significant complexity (it does lot of randomization internally to cover as many edge cases and variations as possible). Unfortunately, this posses a challenge by making it harder to discover important issues/bugs in straightforward way and may lead to so called flaky tests - the tests which flip randomly from success to failure without any code changes. + +If your pull request reports a failing test(s) on one of the checks, please: + - look if there is an existing [issue](https://github.com/opensearch-project/OpenSearch/issues) reported for the test in question + - if not, please make sure this is not caused by your changes, run the failing test(s) locally for some time + - if you are sure the failure is not related, please open a new [bug](https://github.com/opensearch-project/OpenSearch/issues/new?assignees=&labels=bug%2C+untriaged&projects=&template=bug_template.md&title=%5BBUG%5D) with `flaky-test` label + - add a comment referencing the issue(s) or bug report(s) to your pull request explaining the failing build(s) + - as a bonus point, try to contribute by fixing the flaky test(s) diff --git a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle index cb8050d1718c4..74c88e0961c9c 100644 --- a/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle +++ b/buildSrc/src/testKit/thirdPartyAudit/sample_jars/build.gradle @@ -15,8 +15,9 @@ plugins { repositories { mavenCentral() } + dependencies { - implementation "org.apache.logging.log4j:log4j-core:2.20.0" + implementation "org.apache.logging.log4j:log4j-core:2.21.0" } ["0.0.1", "0.0.2"].forEach { v -> diff --git a/buildSrc/version.properties b/buildSrc/version.properties index a5171aa582a86..96d398c35851d 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -14,7 +14,7 @@ jackson_databind = 2.15.2 snakeyaml = 2.1 icu4j = 70.1 supercsv = 2.4.0 -log4j = 2.20.0 +log4j = 2.21.0 slf4j = 1.7.36 asm = 9.6 jettison = 1.5.4 diff --git a/libs/core/licenses/log4j-api-2.20.0.jar.sha1 b/libs/core/licenses/log4j-api-2.20.0.jar.sha1 deleted file mode 100644 index 37154d9861ac0..0000000000000 --- a/libs/core/licenses/log4j-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/libs/core/licenses/log4j-api-2.21.0.jar.sha1 b/libs/core/licenses/log4j-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..51446052594aa --- /dev/null +++ b/libs/core/licenses/log4j-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +760192f2b69eacf4a4afc78e5a1d7a8de054fcbd \ No newline at end of file diff --git a/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/crypto-kms/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/crypto-kms/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-azure-classic/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-ec2/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/discovery-gce/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/discovery-gce/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/repository-gcs/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/repository-gcs/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 deleted file mode 100644 index 800a4aa87ba0e..0000000000000 --- a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -7ab4f082fd162f60afcaf2b8744a3d959feab3e8 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..0e22f98daa61c --- /dev/null +++ b/plugins/repository-hdfs/licenses/log4j-slf4j-impl-2.21.0.jar.sha1 @@ -0,0 +1 @@ +911fdb5b1a1df36719c579ecc6f2957b88bce1ab \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 deleted file mode 100644 index 9829576d38ce0..0000000000000 --- a/plugins/repository-s3/licenses/log4j-1.2-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -689151374756cb809cb029f2501015bdc7733179 \ No newline at end of file diff --git a/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 b/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..39d9177cb2fac --- /dev/null +++ b/plugins/repository-s3/licenses/log4j-1.2-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +12bad3819a9570807f3c97315930699584c12152 \ No newline at end of file diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 4df30bfd2169e..da2c6e8c1b0ee 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -249,7 +249,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override public BlobStore blobStore() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java index 0b5fcb6df280e..45170ea1ad209 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3Reference.java @@ -29,6 +29,7 @@ public class AmazonAsyncS3Reference extends RefCountedReleasable { client.client().close(); client.priorityClient().close(); + client.urgentClient().close(); AwsCredentialsProvider credentials = client.credentials(); if (credentials instanceof Closeable) { try { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java index fa2db83729d25..f8a313b55d945 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/AmazonAsyncS3WithCredentials.java @@ -19,16 +19,19 @@ final class AmazonAsyncS3WithCredentials { private final S3AsyncClient client; private final S3AsyncClient priorityClient; + private final S3AsyncClient urgentClient; private final AwsCredentialsProvider credentials; private AmazonAsyncS3WithCredentials( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { this.client = client; this.credentials = credentials; this.priorityClient = priorityClient; + this.urgentClient = urgentClient; } S3AsyncClient client() { @@ -39,6 +42,10 @@ S3AsyncClient priorityClient() { return priorityClient; } + S3AsyncClient urgentClient() { + return urgentClient; + } + AwsCredentialsProvider credentials() { return credentials; } @@ -46,8 +53,9 @@ AwsCredentialsProvider credentials() { static AmazonAsyncS3WithCredentials create( final S3AsyncClient client, final S3AsyncClient priorityClient, + final S3AsyncClient urgentClient, @Nullable final AwsCredentialsProvider credentials ) { - return new AmazonAsyncS3WithCredentials(client, priorityClient, credentials); + return new AmazonAsyncS3WithCredentials(client, priorityClient, urgentClient, credentials); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java index 08215ebdd45e0..262304029a0d3 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3AsyncService.java @@ -103,6 +103,7 @@ public synchronized void refreshAndClearCache(Map clie */ public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -119,7 +120,7 @@ public AmazonAsyncS3Reference client( return existing; } final AmazonAsyncS3Reference clientReference = new AmazonAsyncS3Reference( - buildClient(clientSettings, priorityExecutorBuilder, normalExecutorBuilder) + buildClient(clientSettings, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder) ); clientReference.incRef(); clientsCache = MapBuilder.newMapBuilder(clientsCache).put(clientSettings, clientReference).immutableMap(); @@ -165,6 +166,7 @@ S3ClientSettings settings(RepositoryMetadata repositoryMetadata) { // proxy for testing synchronized AmazonAsyncS3WithCredentials buildClient( final S3ClientSettings clientSettings, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -195,6 +197,17 @@ synchronized AmazonAsyncS3WithCredentials buildClient( builder.forcePathStyle(true); } + builder.httpClient(buildHttpClient(clientSettings, urgentExecutorBuilder.getAsyncTransferEventLoopGroup())); + builder.asyncConfiguration( + ClientAsyncConfiguration.builder() + .advancedOption( + SdkAdvancedAsyncClientOption.FUTURE_COMPLETION_EXECUTOR, + urgentExecutorBuilder.getFutureCompletionExecutor() + ) + .build() + ); + final S3AsyncClient urgentClient = SocketAccess.doPrivileged(builder::build); + builder.httpClient(buildHttpClient(clientSettings, priorityExecutorBuilder.getAsyncTransferEventLoopGroup())); builder.asyncConfiguration( ClientAsyncConfiguration.builder() @@ -217,7 +230,7 @@ synchronized AmazonAsyncS3WithCredentials buildClient( ); final S3AsyncClient client = SocketAccess.doPrivileged(builder::build); - return AmazonAsyncS3WithCredentials.create(client, priorityClient, credentials); + return AmazonAsyncS3WithCredentials.create(client, priorityClient, urgentClient, credentials); } static ClientOverrideConfiguration buildOverrideConfiguration(final S3ClientSettings clientSettings) { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 24aee99242957..c1180aab0e0c7 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -195,9 +195,14 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp StreamContext streamContext = SocketAccess.doPrivileged(() -> writeContext.getStreamProvider(partSize)); try (AmazonAsyncS3Reference amazonS3Reference = SocketAccess.doPrivileged(blobStore::asyncClientReference)) { - S3AsyncClient s3AsyncClient = writeContext.getWritePriority() == WritePriority.HIGH - ? amazonS3Reference.get().priorityClient() - : amazonS3Reference.get().client(); + S3AsyncClient s3AsyncClient; + if (writeContext.getWritePriority() == WritePriority.URGENT) { + s3AsyncClient = amazonS3Reference.get().urgentClient(); + } else if (writeContext.getWritePriority() == WritePriority.HIGH) { + s3AsyncClient = amazonS3Reference.get().priorityClient(); + } else { + s3AsyncClient = amazonS3Reference.get().client(); + } CompletableFuture completableFuture = blobStore.getAsyncTransferManager() .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index f568d871dd31a..e8e043357e126 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -84,6 +84,7 @@ class S3BlobStore implements BlobStore { private final StatsMetricPublisher statsMetricPublisher = new StatsMetricPublisher(); private final AsyncTransferManager asyncTransferManager; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final boolean multipartUploadEnabled; @@ -100,6 +101,7 @@ class S3BlobStore implements BlobStore { int bulkDeletesSize, RepositoryMetadata repositoryMetadata, AsyncTransferManager asyncTransferManager, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { @@ -116,6 +118,7 @@ class S3BlobStore implements BlobStore { this.asyncTransferManager = asyncTransferManager; this.normalExecutorBuilder = normalExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; + this.urgentExecutorBuilder = urgentExecutorBuilder; } @Override @@ -139,7 +142,7 @@ public AmazonS3Reference clientReference() { } public AmazonAsyncS3Reference asyncClientReference() { - return s3AsyncService.client(repositoryMetadata, priorityExecutorBuilder, normalExecutorBuilder); + return s3AsyncService.client(repositoryMetadata, urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder); } int getMaxRetries() { diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java index aaf5b79891cdc..728a99b1220a6 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3Repository.java @@ -234,6 +234,7 @@ class S3Repository extends MeteredBlobStoreRepository { private final AsyncTransferManager asyncUploadUtils; private final S3AsyncService s3AsyncService; private final boolean multipartUploadEnabled; + private final AsyncExecutorContainer urgentExecutorBuilder; private final AsyncExecutorContainer priorityExecutorBuilder; private final AsyncExecutorContainer normalExecutorBuilder; private final Path pluginConfigPath; @@ -248,6 +249,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -260,6 +262,7 @@ class S3Repository extends MeteredBlobStoreRepository { clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, @@ -278,6 +281,7 @@ class S3Repository extends MeteredBlobStoreRepository { final ClusterService clusterService, final RecoverySettings recoverySettings, final AsyncTransferManager asyncUploadUtils, + final AsyncExecutorContainer urgentExecutorBuilder, final AsyncExecutorContainer priorityExecutorBuilder, final AsyncExecutorContainer normalExecutorBuilder, final S3AsyncService s3AsyncService, @@ -290,6 +294,7 @@ class S3Repository extends MeteredBlobStoreRepository { this.multipartUploadEnabled = multipartUploadEnabled; this.pluginConfigPath = pluginConfigPath; this.asyncUploadUtils = asyncUploadUtils; + this.urgentExecutorBuilder = urgentExecutorBuilder; this.priorityExecutorBuilder = priorityExecutorBuilder; this.normalExecutorBuilder = normalExecutorBuilder; @@ -352,6 +357,7 @@ protected S3BlobStore createBlobStore() { bulkDeletesSize, metadata, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder ); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index c6450e49d08e2..9ed232464d080 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -75,6 +75,9 @@ * A plugin to add a repository type that writes to and from the AWS S3. */ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, ReloadablePlugin { + + private static final String URGENT_FUTURE_COMPLETION = "urgent_future_completion"; + private static final String URGENT_STREAM_READER = "urgent_stream_reader"; private static final String PRIORITY_FUTURE_COMPLETION = "priority_future_completion"; private static final String PRIORITY_STREAM_READER = "priority_stream_reader"; private static final String FUTURE_COMPLETION = "future_completion"; @@ -85,6 +88,7 @@ public class S3RepositoryPlugin extends Plugin implements RepositoryPlugin, Relo private final Path configPath; + private AsyncExecutorContainer urgentExecutorBuilder; private AsyncExecutorContainer priorityExecutorBuilder; private AsyncExecutorContainer normalExecutorBuilder; @@ -96,6 +100,10 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); + executorBuilders.add( + new FixedExecutorBuilder(settings, URGENT_FUTURE_COMPLETION, urgentPoolCount(settings), 10_000, URGENT_FUTURE_COMPLETION) + ); + executorBuilders.add(new ScalingExecutorBuilder(URGENT_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); @@ -128,6 +136,10 @@ private static int allocatedProcessors(Settings settings) { return OpenSearchExecutors.allocatedProcessors(settings); } + private static int urgentPoolCount(Settings settings) { + return boundedBy((allocatedProcessors(settings) + 7) / 8, 1, 2); + } + private static int priorityPoolCount(Settings settings) { return boundedBy((allocatedProcessors(settings) + 1) / 2, 2, 4); } @@ -150,8 +162,14 @@ public Collection createComponents( final IndexNameExpressionResolver expressionResolver, final Supplier repositoriesServiceSupplier ) { + int urgentEventLoopThreads = urgentPoolCount(clusterService.getSettings()); int priorityEventLoopThreads = priorityPoolCount(clusterService.getSettings()); int normalEventLoopThreads = normalPoolCount(clusterService.getSettings()); + this.urgentExecutorBuilder = new AsyncExecutorContainer( + threadPool.executor(URGENT_FUTURE_COMPLETION), + threadPool.executor(URGENT_STREAM_READER), + new AsyncTransferEventLoopGroup(urgentEventLoopThreads) + ); this.priorityExecutorBuilder = new AsyncExecutorContainer( threadPool.executor(PRIORITY_FUTURE_COMPLETION), threadPool.executor(PRIORITY_STREAM_READER), @@ -176,7 +194,8 @@ protected S3Repository createRepository( AsyncTransferManager asyncUploadUtils = new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.get(clusterService.getSettings()).getBytes(), normalExecutorBuilder.getStreamReader(), - priorityExecutorBuilder.getStreamReader() + priorityExecutorBuilder.getStreamReader(), + urgentExecutorBuilder.getStreamReader() ); return new S3Repository( metadata, @@ -185,6 +204,7 @@ protected S3Repository createRepository( clusterService, recoverySettings, asyncUploadUtils, + urgentExecutorBuilder, priorityExecutorBuilder, normalExecutorBuilder, s3AsyncService, diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index 6007d9f9c8a1c..933ee6dc29513 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -48,6 +48,7 @@ public class AsyncPartsHandler { * @param s3AsyncClient S3 client to use for upload * @param executorService Thread pool for regular upload * @param priorityExecutorService Thread pool for priority uploads + * @param urgentExecutorService Thread pool for urgent uploads * @param uploadRequest request for upload * @param streamContext Stream context used in supplying individual file parts * @param uploadId Upload Id against which multi-part is being performed @@ -60,6 +61,7 @@ public static List> uploadParts( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, UploadRequest uploadRequest, StreamContext streamContext, String uploadId, @@ -83,6 +85,7 @@ public static List> uploadParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, completedParts, inputStreamContainers, futures, @@ -129,6 +132,7 @@ private static void uploadPart( S3AsyncClient s3AsyncClient, ExecutorService executorService, ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService, AtomicReferenceArray completedParts, AtomicReferenceArray inputStreamContainers, List> futures, @@ -138,9 +142,14 @@ private static void uploadPart( ) { Integer partNumber = uploadPartRequest.partNumber(); - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index a52745e33073e..4f1ab9764702e 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -61,6 +61,7 @@ public final class AsyncTransferManager { private static final Logger log = LogManager.getLogger(AsyncTransferManager.class); private final ExecutorService executorService; private final ExecutorService priorityExecutorService; + private final ExecutorService urgentExecutorService; private final long minimumPartSize; /** @@ -75,10 +76,16 @@ public final class AsyncTransferManager { * @param executorService The stream reader {@link ExecutorService} for normal priority uploads * @param priorityExecutorService The stream read {@link ExecutorService} for high priority uploads */ - public AsyncTransferManager(long minimumPartSize, ExecutorService executorService, ExecutorService priorityExecutorService) { + public AsyncTransferManager( + long minimumPartSize, + ExecutorService executorService, + ExecutorService priorityExecutorService, + ExecutorService urgentExecutorService + ) { this.executorService = executorService; this.priorityExecutorService = priorityExecutorService; this.minimumPartSize = minimumPartSize; + this.urgentExecutorService = urgentExecutorService; } /** @@ -162,6 +169,7 @@ private void doUploadInParts( s3AsyncClient, executorService, priorityExecutorService, + urgentExecutorService, uploadRequest, streamContext, uploadId, @@ -308,9 +316,14 @@ private void uploadInOneChunk( putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); } - ExecutorService streamReadExecutor = uploadRequest.getWritePriority() == WritePriority.HIGH - ? priorityExecutorService - : executorService; + ExecutorService streamReadExecutor; + if (uploadRequest.getWritePriority() == WritePriority.URGENT) { + streamReadExecutor = urgentExecutorService; + } else if (uploadRequest.getWritePriority() == WritePriority.HIGH) { + streamReadExecutor = priorityExecutorService; + } else { + streamReadExecutor = executorService; + } // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered // data can be retried instead of retrying whole file by the application. InputStream inputStream = new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java index a4bfe11383b4f..8e1926d40302f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/RepositoryCredentialsTests.java @@ -302,7 +302,7 @@ protected S3Repository createRepository( ClusterService clusterService, RecoverySettings recoverySettings ) { - return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, false) { + return new S3Repository(metadata, registry, service, clusterService, recoverySettings, null, null, null, null, null, false) { @Override protected void assertSnapshotOrGenericThread() { // eliminate thread name check as we create repo manually on test/main threads diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java index e9fe557ab751a..de9ad46bb222d 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3AsyncServiceTests.java @@ -44,12 +44,12 @@ public void testCachedClientsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); @@ -79,12 +79,12 @@ public void testCachedClientsWithCredentialsAreReleased() { final S3ClientSettings otherClientSettings = s3AsyncService.settings(metadata2); assertSame(clientSettings, otherClientSettings); final AmazonAsyncS3Reference reference = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); reference.close(); s3AsyncService.close(); final AmazonAsyncS3Reference referenceReloaded = SocketAccess.doPrivileged( - () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer) + () -> s3AsyncService.client(metadata1, asyncExecutorContainer, asyncExecutorContainer, asyncExecutorContainer) ); assertNotSame(referenceReloaded, reference); referenceReloaded.close(); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java index 6eb8faa746d34..7c67519f2f3b0 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerMockClientTests.java @@ -266,10 +266,11 @@ public void verifySingleChunkUploadCallCount(boolean finalizeUploadFailure) { @Override public AmazonAsyncS3Reference client( RepositoryMetadata repositoryMetadata, + AsyncExecutorContainer urgentExecutorBuilder, AsyncExecutorContainer priorityExecutorBuilder, AsyncExecutorContainer normalExecutorBuilder ) { - return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, null)); + return new AmazonAsyncS3Reference(AmazonAsyncS3WithCredentials.create(asyncClient, asyncClient, asyncClient, null)); } } @@ -393,9 +394,11 @@ private S3BlobStore createBlobStore() { new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ); } diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java index a2214f5218991..ceab06bd051e9 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobContainerRetriesTests.java @@ -221,9 +221,11 @@ protected AsyncMultiStreamBlobContainer createBlobContainer( new AsyncTransferManager( S3Repository.PARALLEL_MULTIPART_UPLOAD_MINIMUM_PART_SIZE_SETTING.getDefault(Settings.EMPTY).getBytes(), asyncExecutorContainer.getStreamReader(), + asyncExecutorContainer.getStreamReader(), asyncExecutorContainer.getStreamReader() ), asyncExecutorContainer, + asyncExecutorContainer, asyncExecutorContainer ) ) { diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java index 2701cae6a733b..58ad290a31e85 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3BlobStoreContainerTests.java @@ -935,7 +935,7 @@ public void testReadBlobAsyncMultiPart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -993,7 +993,7 @@ public void testReadBlobAsyncSinglePart() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); final BlobPath blobPath = new BlobPath(); @@ -1048,7 +1048,7 @@ public void testReadBlobAsyncFailure() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); @@ -1091,7 +1091,7 @@ public void testReadBlobAsyncOnCompleteFailureMissingData() throws Exception { final S3AsyncClient s3AsyncClient = mock(S3AsyncClient.class); final AmazonAsyncS3Reference amazonAsyncS3Reference = new AmazonAsyncS3Reference( - AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, null) + AmazonAsyncS3WithCredentials.create(s3AsyncClient, s3AsyncClient, s3AsyncClient, null) ); final S3BlobStore blobStore = mock(S3BlobStore.class); diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java index e65ca69a5047b..6fec535ae6301 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/S3RepositoryTests.java @@ -168,6 +168,7 @@ private S3Repository createS3Repo(RepositoryMetadata metadata) { null, null, null, + null, false ) { @Override diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 97a746cdeed93..2437547a80a6f 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -64,6 +64,7 @@ public void setUp() throws Exception { asyncTransferManager = new AsyncTransferManager( ByteSizeUnit.MB.toBytes(5), Executors.newSingleThreadExecutor(), + Executors.newSingleThreadExecutor(), Executors.newSingleThreadExecutor() ); super.setUp(); diff --git a/qa/os/build.gradle b/qa/os/build.gradle index 66c6525439dac..082ed5277575a 100644 --- a/qa/os/build.gradle +++ b/qa/os/build.gradle @@ -70,6 +70,11 @@ tasks.dependenciesInfo.enabled = false tasks.thirdPartyAudit.ignoreMissingClasses() +tasks.thirdPartyAudit.ignoreViolations( + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil$1' +) + tasks.register('destructivePackagingTest') { dependsOn 'destructiveDistroTest' } diff --git a/server/build.gradle b/server/build.gradle index f6db3d53a0dcc..c56f9d5aa288f 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -364,7 +364,9 @@ tasks.named("thirdPartyAudit").configure { 'com.google.protobuf.UnsafeUtil$Android32MemoryAccessor', 'com.google.protobuf.UnsafeUtil$Android64MemoryAccessor', 'com.google.protobuf.UnsafeUtil$JvmMemoryAccessor', - 'com.google.protobuf.UnsafeUtil$MemoryAccessor' + 'com.google.protobuf.UnsafeUtil$MemoryAccessor', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil', + 'org.apache.logging.log4j.core.util.internal.UnsafeUtil$1' ) } diff --git a/server/licenses/log4j-api-2.20.0.jar.sha1 b/server/licenses/log4j-api-2.20.0.jar.sha1 deleted file mode 100644 index 37154d9861ac0..0000000000000 --- a/server/licenses/log4j-api-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -1fe6082e660daf07c689a89c94dc0f49c26b44bb \ No newline at end of file diff --git a/server/licenses/log4j-api-2.21.0.jar.sha1 b/server/licenses/log4j-api-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..51446052594aa --- /dev/null +++ b/server/licenses/log4j-api-2.21.0.jar.sha1 @@ -0,0 +1 @@ +760192f2b69eacf4a4afc78e5a1d7a8de054fcbd \ No newline at end of file diff --git a/server/licenses/log4j-core-2.20.0.jar.sha1 b/server/licenses/log4j-core-2.20.0.jar.sha1 deleted file mode 100644 index 49c972626563b..0000000000000 --- a/server/licenses/log4j-core-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -eb2a9a47b1396e00b5eee1264296729a70565cc0 \ No newline at end of file diff --git a/server/licenses/log4j-core-2.21.0.jar.sha1 b/server/licenses/log4j-core-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..c88e6f7a25ca9 --- /dev/null +++ b/server/licenses/log4j-core-2.21.0.jar.sha1 @@ -0,0 +1 @@ +122e1a9e0603cc9eae07b0846a6ff01f2454bc49 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.20.0.jar.sha1 b/server/licenses/log4j-jul-2.20.0.jar.sha1 deleted file mode 100644 index a456651e4569e..0000000000000 --- a/server/licenses/log4j-jul-2.20.0.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8170e6118eac1ab332046c179718a0f107f688e1 \ No newline at end of file diff --git a/server/licenses/log4j-jul-2.21.0.jar.sha1 b/server/licenses/log4j-jul-2.21.0.jar.sha1 new file mode 100644 index 0000000000000..480010840abca --- /dev/null +++ b/server/licenses/log4j-jul-2.21.0.jar.sha1 @@ -0,0 +1 @@ +f0da61113f4a47654677e6a98b1e13ca7de2483d \ No newline at end of file diff --git a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java index 50ff76c6b62f3..82ab5b0118c0e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/admin/indices/datastream/DataStreamTestCase.java @@ -37,6 +37,7 @@ public AcknowledgedResponse createDataStream(String name) throws Exception { CreateDataStreamAction.Request request = new CreateDataStreamAction.Request(name); AcknowledgedResponse response = client().admin().indices().createDataStream(request).get(); assertThat(response.isAcknowledged(), is(true)); + performRemoteStoreTestAction(); return response; } @@ -67,6 +68,7 @@ public RolloverResponse rolloverDataStream(String name) throws Exception { RolloverResponse response = client().admin().indices().rolloverIndex(request).get(); assertThat(response.isAcknowledged(), is(true)); assertThat(response.isRolledOver(), is(true)); + performRemoteStoreTestAction(); return response; } @@ -109,5 +111,4 @@ public AcknowledgedResponse deleteIndexTemplate(String name) throws Exception { assertThat(response.isAcknowledged(), is(true)); return response; } - } diff --git a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java index 1463c45aa9b2f..79f6ba6dfa642 100644 --- a/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/discovery/ClusterManagerDisruptionIT.java @@ -39,6 +39,7 @@ import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.NoClusterManagerBlockService; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.xcontent.MediaTypeRegistry; @@ -199,6 +200,8 @@ public void testIsolateClusterManagerAndVerifyClusterStateConsensus() throws Exc } } + ClusterStateStats clusterStateStats = internalCluster().clusterService().getClusterManagerService().getClusterStateStats(); + assertTrue(clusterStateStats.getUpdateFailed() > 0); }); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 7304304e522f8..59eef3c06844b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -8,9 +8,12 @@ package org.opensearch.gateway.remote; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsRequest; +import org.opensearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.blobstore.BlobPath; import org.opensearch.common.settings.Settings; +import org.opensearch.discovery.DiscoveryStats; import org.opensearch.remotestore.RemoteStoreBaseIntegTestCase; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.blobstore.BlobStoreRepository; @@ -19,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Map; +import java.util.stream.Collectors; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; @@ -94,6 +98,45 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } + public void testRemoteStateStats() { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + prepareCluster(clusterManagerNodeCount, dataNodeCount, INDEX_NAME, replicaCount, shardCount); + String clusterManagerNode = internalCluster().getClusterManagerName(); + String dataNode = internalCluster().getDataNodeNames().stream().collect(Collectors.toList()).get(0); + + // Fetch _nodes/stats + NodesStatsResponse nodesStatsResponse = client().admin() + .cluster() + .prepareNodesStats(clusterManagerNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + + // assert cluster state stats + DiscoveryStats discoveryStats = nodesStatsResponse.getNodes().get(0).getDiscoveryStats(); + + assertNotNull(discoveryStats.getClusterStateStats()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateSuccess() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getUpdateFailed()); + assertTrue(discoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() > 0); + // assert remote state stats + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getSuccessCount() > 1); + assertEquals(0, discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getFailedCount()); + assertTrue(discoveryStats.getClusterStateStats().getPersistenceStats().get(0).getTotalTimeInMillis() > 0); + + NodesStatsResponse nodesStatsResponseDataNode = client().admin() + .cluster() + .prepareNodesStats(dataNode) + .addMetric(NodesStatsRequest.Metric.DISCOVERY.metricName()) + .get(); + // assert cluster state stats for data node + DiscoveryStats dataNodeDiscoveryStats = nodesStatsResponseDataNode.getNodes().get(0).getDiscoveryStats(); + assertNotNull(dataNodeDiscoveryStats.getClusterStateStats()); + assertEquals(0, dataNodeDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + } + private void setReplicaCount(int replicaCount) { client().admin() .indices() diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java index a82fd8d845709..186a5ce39f131 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java @@ -19,6 +19,7 @@ import org.opensearch.test.OpenSearchIntegTestCase; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REPLICATION_TYPE; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_SETTING_REPLICATION_TYPE; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) @@ -123,4 +124,30 @@ public void testIndexReplicationSettingOverridesDocRepClusterSetting() throws Ex assertEquals(indicesService.indexService(anotherIndex).getIndexSettings().isSegRepEnabled(), false); } + public void testIndexReplicationTypeWhenRestrictSettingTrue() { + testRestrictIndexReplicationTypeSetting(true, randomFrom(ReplicationType.values())); + } + + public void testIndexReplicationTypeWhenRestrictSettingFalse() { + testRestrictIndexReplicationTypeSetting(false, randomFrom(ReplicationType.values())); + } + + private void testRestrictIndexReplicationTypeSetting(boolean setRestrict, ReplicationType replicationType) { + String expectedExceptionMsg = + "Validation Failed: 1: index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true];"; + String clusterManagerName = internalCluster().startNode( + Settings.builder().put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), setRestrict).build() + ); + internalCluster().startDataOnlyNodes(1); + + // Test create index fails + Settings indexSettings = Settings.builder().put(indexSettings()).put(SETTING_REPLICATION_TYPE, replicationType).build(); + if (setRestrict) { + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> createIndex(INDEX_NAME, indexSettings)); + assertEquals(expectedExceptionMsg, exception.getMessage()); + } else { + createIndex(INDEX_NAME, indexSettings); + } + } + } diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java index dd832a63d1e66..dbe0b43441f54 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationRelocationIT.java @@ -26,6 +26,7 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.test.OpenSearchIntegTestCase; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.MockTransportService; import org.opensearch.transport.TransportService; @@ -55,6 +56,7 @@ private void createIndex(int replicaCount) { * This test verifies happy path when primary shard is relocated newly added node (target) in the cluster. Before * relocation and after relocation documents are indexed and documents are verified */ + @TestLogging(reason = "Getting trace logs from replication,shard and allocation package", value = "org.opensearch.indices.replication:TRACE, org.opensearch.index.shard:TRACE, org.opensearch.cluster.routing.allocation:TRACE") public void testPrimaryRelocation() throws Exception { final String oldPrimary = internalCluster().startNode(); createIndex(1); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java index b8481610869e6..99c5d7fb2bae7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/BaseRemoteStoreRestoreIT.java @@ -46,7 +46,10 @@ protected Collection> nodePlugins() { } protected void restore(String... indices) { - boolean restoreAllShards = randomBoolean(); + restore(randomBoolean(), indices); + } + + protected void restore(boolean restoreAllShards, String... indices) { if (restoreAllShards) { assertAcked(client().admin().indices().prepareClose(indices)); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java index 865b2d13f189e..9e0b2a66467de 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteRestoreSnapshotIT.java @@ -11,6 +11,7 @@ import org.opensearch.action.DocWriteResponse; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse; +import org.opensearch.action.admin.indices.delete.DeleteIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexRequest; import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.delete.DeleteResponse; @@ -20,8 +21,13 @@ import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.io.PathUtils; import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.index.Index; import org.opensearch.core.rest.RestStatus; +import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.snapshots.AbstractSnapshotIntegTestCase; import org.opensearch.snapshots.SnapshotInfo; @@ -32,11 +38,15 @@ import org.junit.Before; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Optional; import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_REMOTE_STORE_ENABLED; @@ -345,6 +355,90 @@ public void testRestoreInSameRemoteStoreEnabledIndex() throws IOException { assertDocsPresentInIndex(client, indexName1, numDocsInIndex1 + 4); } + public void testRemoteRestoreIndexRestoredFromSnapshot() throws IOException, ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + internalCluster().startDataOnlyNodes(2); + + String indexName1 = "testindex1"; + String snapshotRepoName = "test-restore-snapshot-repo"; + String snapshotName1 = "test-restore-snapshot1"; + Path absolutePath1 = randomRepoPath().toAbsolutePath(); + logger.info("Snapshot Path [{}]", absolutePath1); + + createRepository(snapshotRepoName, "fs", getRepositorySettings(absolutePath1, true)); + + Settings indexSettings = getIndexSettings(1, 0).build(); + createIndex(indexName1, indexSettings); + + final int numDocsInIndex1 = randomIntBetween(20, 30); + indexDocuments(client(), indexName1, numDocsInIndex1); + flushAndRefresh(indexName1); + ensureGreen(indexName1); + + logger.info("--> snapshot"); + SnapshotInfo snapshotInfo1 = createSnapshot(snapshotRepoName, snapshotName1, new ArrayList<>(Arrays.asList(indexName1))); + assertThat(snapshotInfo1.successfulShards(), greaterThan(0)); + assertThat(snapshotInfo1.successfulShards(), equalTo(snapshotInfo1.totalShards())); + assertThat(snapshotInfo1.state(), equalTo(SnapshotState.SUCCESS)); + + assertAcked(client().admin().indices().delete(new DeleteIndexRequest(indexName1)).get()); + assertFalse(indexExists(indexName1)); + + RestoreSnapshotResponse restoreSnapshotResponse1 = client().admin() + .cluster() + .prepareRestoreSnapshot(snapshotRepoName, snapshotName1) + .setWaitForCompletion(false) + .setIndices(indexName1) + .get(); + + assertEquals(restoreSnapshotResponse1.status(), RestStatus.ACCEPTED); + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + + // Make sure remote translog is empty + String indexUUID = client().admin() + .indices() + .prepareGetSettings(indexName1) + .get() + .getSetting(indexName1, IndexMetadata.SETTING_INDEX_UUID); + + Path remoteTranslogMetadataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/metadata"); + Path remoteTranslogDataPath = Path.of(String.valueOf(remoteRepoPath), indexUUID, "/0/translog/data"); + + try ( + Stream translogMetadata = Files.list(remoteTranslogMetadataPath); + Stream translogData = Files.list(remoteTranslogDataPath) + ) { + assertTrue(translogData.count() > 0); + assertTrue(translogMetadata.count() > 0); + } + + // Clear the local data before stopping the node. This will make sure that remote translog is empty. + IndexShard indexShard = getIndexShard(primaryNodeName(indexName1), indexName1); + try (Stream files = Files.list(indexShard.shardPath().resolveTranslog())) { + IOUtils.deleteFilesIgnoringExceptions(files.collect(Collectors.toList())); + } + internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(indexName1))); + + ensureRed(indexName1); + + client().admin() + .cluster() + .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(indexName1).restoreAllShards(false), PlainActionFuture.newFuture()); + + ensureGreen(indexName1); + assertDocsPresentInIndex(client(), indexName1, numDocsInIndex1); + } + + protected IndexShard getIndexShard(String node, String indexName) { + final Index index = resolveIndex(indexName); + IndicesService indicesService = internalCluster().getInstance(IndicesService.class, node); + IndexService indexService = indicesService.indexService(index); + assertNotNull(indexService); + final Optional shardId = indexService.shardIds().stream().findFirst(); + return shardId.map(indexService::getShard).orElse(null); + } + public void testRestoreShallowCopySnapshotWithDifferentRepo() throws IOException { String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(); String primary = internalCluster().startDataOnlyNode(); diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java index 98586b60dcc69..f19c9db7874db 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBackpressureAndResiliencyIT.java @@ -56,7 +56,7 @@ public void testWritesRejectedDueToBytesLagBreach() throws Exception { public void testWritesRejectedDueToTimeLagBreach() throws Exception { // Initially indexing happens with doc size of 1KB, then all remote store interactions start failing. Now, the // indexing happens with doc size of 1 byte leading to time lag limit getting exceeded and leading to rejections. - validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 15, "time_lag"); + validateBackpressure(ByteSizeUnit.KB.toIntBytes(1), 20, ByteSizeUnit.BYTES.toIntBytes(1), 3, "time_lag"); } private void validateBackpressure( @@ -133,11 +133,13 @@ private RemoteSegmentTransferTracker.Stats stats() { return matches.get(0).getSegmentStats(); } - private void indexDocAndRefresh(BytesReference source, int iterations) { + private void indexDocAndRefresh(BytesReference source, int iterations) throws InterruptedException { for (int i = 0; i < iterations; i++) { client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); refresh(INDEX_NAME); } + Thread.sleep(250); + client().prepareIndex(INDEX_NAME).setSource(source, MediaTypeRegistry.JSON).get(); } /** diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 3a3e293de9b13..e9afd6d36bb87 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -9,6 +9,7 @@ package org.opensearch.remotestore; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; +import org.opensearch.action.admin.indices.datastream.DataStreamRolloverIT; import org.opensearch.action.admin.indices.settings.put.UpdateSettingsRequest; import org.opensearch.action.admin.indices.template.put.PutIndexTemplateRequest; import org.opensearch.cluster.ClusterState; @@ -21,16 +22,19 @@ import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.nio.file.Files; +import java.nio.file.Path; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.ExecutionException; +import static org.opensearch.cluster.coordination.ClusterBootstrapService.INITIAL_CLUSTER_MANAGER_NODES_SETTING; import static org.opensearch.cluster.metadata.IndexMetadata.INDEX_READ_ONLY_SETTING; import static org.opensearch.cluster.metadata.Metadata.CLUSTER_READ_ONLY_BLOCK; import static org.opensearch.cluster.metadata.Metadata.SETTING_READ_ONLY_SETTING; @@ -65,6 +69,13 @@ private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().startDataOnlyNodes(dataNodeCount); } + protected void verifyRedIndicesAndTriggerRestore(Map indexStats, String indexName, boolean indexMoreDocs) + throws Exception { + ensureRed(indexName); + restore(false, indexName); + verifyRestoredData(indexStats, indexName, indexMoreDocs); + } + public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -83,7 +94,73 @@ public void testFullClusterRestore() throws Exception { // Step - 3 Trigger full cluster restore and validate validateMetadata(List.of(INDEX_NAME)); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); + } + + /** + * This test scenario covers the case where right after remote state restore and persisting it to disk via LucenePersistedState, full cluster restarts. + * This is a special case for remote state as at this point cluster uuid in the restored state is still ClusterState.UNKNOWN_UUID as we persist it disk. + * After restart the local disk state will be read but should be again overridden with remote state. + * + * 1. Form a cluster and index few docs + * 2. Replace all nodes to remove all local disk state + * 3. Start cluster manager node without correct seeding to ensure local disk state is written with cluster uuid ClusterState.UNKNOWN_UUID but with remote restored Metadata + * 4. Restart the cluster manager node with correct seeding. + * 5. After restart the cluster manager picks up the local disk state with has same Metadata as remote but cluster uuid is still ClusterState.UNKNOWN_UUID + * 6. The cluster manager will try to restore from remote again. + * 7. Metadata loaded from local disk state will be overridden with remote Metadata and no conflict should arise. + * 8. Add data nodes to recover index data + * 9. Verify Metadata and index data is restored. + */ + public void testFullClusterRestoreDoesntFailWithConflictingLocalState() throws Exception { + int shardCount = randomIntBetween(1, 2); + int replicaCount = 1; + int dataNodeCount = shardCount * (replicaCount + 1); + int clusterManagerNodeCount = 1; + + // index some data to generate files in remote directory + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, 1); + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + + // stop all nodes + internalCluster().stopAllNodes(); + + // start a cluster manager node with no cluster manager seeding. + // This should fail with IllegalStateException as cluster manager fails to form without any initial seed + assertThrows( + IllegalStateException.class, + () -> internalCluster().startClusterManagerOnlyNodes( + clusterManagerNodeCount, + Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey()) // disable seeding during bootstrapping + .build() + ) + ); + + // verify cluster manager not elected + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) + : "Disabling Cluster manager seeding failed. cluster uuid is not unknown"; + + // restart cluster manager with correct seed + internalCluster().fullRestart(new InternalTestCluster.RestartCallback() { + @Override + public Settings onNodeStopped(String nodeName) { + return Settings.builder() + .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) // Seed with correct Cluster Manager node + .build(); + } + }); + + // validate new cluster state formed + newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert !Objects.equals(newClusterUUID, ClusterState.UNKNOWN_UUID) : "cluster restart not successful. cluster uuid is still unknown"; + assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; + validateMetadata(List.of(INDEX_NAME)); + + // start data nodes to trigger index data recovery + internalCluster().startDataOnlyNodes(dataNodeCount); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } public void testFullClusterRestoreMultipleIndices() throws Exception { @@ -112,8 +189,8 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { // Step - 3 Trigger full cluster restore validateMetadata(List.of(INDEX_NAME, secondIndexName)); - verifyRestoredData(indexStats, INDEX_NAME); - verifyRestoredData(indexStats2, secondIndexName, false); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + verifyRedIndicesAndTriggerRestore(indexStats2, secondIndexName, false); assertTrue(INDEX_READ_ONLY_SETTING.get(clusterService().state().metadata().index(secondIndexName).getSettings())); assertThrows(ClusterBlockException.class, () -> indexSingleDoc(secondIndexName)); // Test is complete @@ -181,7 +258,7 @@ public void testRemoteStateFullRestart() throws Exception { String newClusterUUID = clusterService().state().metadata().clusterUUID(); assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME); + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, true); } private void validateMetadata(List indexNames) { @@ -215,6 +292,14 @@ private void validateCurrentMetadata() throws Exception { }); } + public void testDataStreamPostRemoteStateRestore() throws Exception { + new DataStreamRolloverIT() { + protected boolean triggerRemoteStateRestore() { + return true; + } + }.testDataStreamRollover(); + } + public void testFullClusterRestoreGlobalMetadata() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -226,8 +311,7 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { String prevClusterUUID = clusterService().state().metadata().clusterUUID(); // Create global metadata - register a custom repo - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // registerCustomRepository(); + Path repoPath = registerCustomRepository(); // Create global metadata - persistent settings updatePersistentSettings(Settings.builder().put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), 34).build()); @@ -246,41 +330,46 @@ public void testFullClusterRestoreGlobalMetadata() throws Exception { // Step - 3 Trigger full cluster restore and validate // validateCurrentMetadata(); - verifyRestoredData(indexStats, INDEX_NAME, false); - - // validate global metadata restored - verifyRestoredRepositories(); - verifyRestoredIndexTemplate(); assertEquals(Integer.valueOf(34), SETTING_CLUSTER_MAX_SHARDS_PER_NODE.get(clusterService().state().metadata().settings())); assertEquals(true, SETTING_READ_ONLY_SETTING.get(clusterService().state().metadata().settings())); assertTrue(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); - // Test is complete - // Remote the cluster read only block to ensure proper cleanup updatePersistentSettings(Settings.builder().put(SETTING_READ_ONLY_SETTING.getKey(), false).build()); assertFalse(clusterService().state().blocks().hasGlobalBlock(CLUSTER_READ_ONLY_BLOCK)); + + verifyRedIndicesAndTriggerRestore(indexStats, INDEX_NAME, false); + + // validate global metadata restored + verifyRestoredRepositories(repoPath); + verifyRestoredIndexTemplate(); } - private void registerCustomRepository() { + private Path registerCustomRepository() { + Path path = randomRepoPath(); assertAcked( client().admin() .cluster() .preparePutRepository("custom-repo") .setType("fs") - .setSettings(Settings.builder().put("location", randomRepoPath()).put("compress", false)) + .setSettings(Settings.builder().put("location", path).put("compress", false)) .get() ); + return path; } - private void verifyRestoredRepositories() { + private void verifyRestoredRepositories(Path repoPath) { RepositoriesMetadata repositoriesMetadata = clusterService().state().metadata().custom(RepositoriesMetadata.TYPE); - assertEquals(2, repositoriesMetadata.repositories().size()); // includes remote store repo as well + assertEquals(3, repositoriesMetadata.repositories().size()); // includes remote store repo as well assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_NAME).settings())); assertTrue(SYSTEM_REPOSITORY_SETTING.get(repositoriesMetadata.repository(REPOSITORY_2_NAME).settings())); - // TODO - uncomment after all customs is also uploaded for all repos - https://github.com/opensearch-project/OpenSearch/issues/10691 - // assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); - // assertEquals(Settings.builder().put("location", randomRepoPath()).put("compress", false).build(), - // repositoriesMetadata.repository("custom-repo").settings()); + assertEquals("fs", repositoriesMetadata.repository("custom-repo").type()); + assertEquals( + Settings.builder().put("location", repoPath).put("compress", false).build(), + repositoriesMetadata.repository("custom-repo").settings() + ); + + // repo cleanup post verification + clusterAdmin().prepareDeleteRepository("custom-repo").get(); } private void addClusterLevelReadOnlyBlock() throws InterruptedException, ExecutionException { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java index 1fb5c2052aded..b3b4f8e10fd31 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreIT.java @@ -509,4 +509,27 @@ public void testRestoreSnapshotToIndexWithSameNameDifferentUUID() throws Excepti assertHitCount(client(dataNodes.get(1)).prepareSearch(INDEX_NAME).setSize(0).get(), 50); }); } + + public void testNoSearchIdleForAnyReplicaCount() throws ExecutionException, InterruptedException { + internalCluster().startClusterManagerOnlyNode(); + String primaryShardNode = internalCluster().startDataOnlyNodes(1).get(0); + + createIndex(INDEX_NAME, remoteStoreIndexSettings(0)); + ensureGreen(INDEX_NAME); + IndexShard indexShard = getIndexShard(primaryShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + + String replicaShardNode = internalCluster().startDataOnlyNodes(1).get(0); + assertAcked( + client().admin() + .indices() + .prepareUpdateSettings(INDEX_NAME) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)) + ); + ensureGreen(INDEX_NAME); + assertFalse(indexShard.isSearchIdleSupported()); + + indexShard = getIndexShard(replicaShardNode); + assertFalse(indexShard.isSearchIdleSupported()); + } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java index 212f797180077..7626e3dba6424 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreRestoreIT.java @@ -10,11 +10,8 @@ import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreRequest; import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; -import org.opensearch.action.admin.indices.get.GetIndexRequest; -import org.opensearch.action.admin.indices.get.GetIndexResponse; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.health.ClusterHealthStatus; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Settings; @@ -22,12 +19,10 @@ import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; -import org.opensearch.test.CorruptionUtils; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; -import java.nio.file.Files; import java.nio.file.Path; import java.util.HashMap; import java.util.Locale; @@ -35,14 +30,13 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import java.util.stream.Stream; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.greaterThan; -@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, numDataNodes = 0) public class RemoteStoreRestoreIT extends BaseRemoteStoreRestoreIT { /** @@ -467,30 +461,5 @@ public void testRateLimitedRemoteDownloads() throws Exception { } } - public void testRestoreCorruptSegmentShouldFail() throws IOException, ExecutionException, InterruptedException { - prepareCluster(1, 3, INDEX_NAME, 0, 1); - indexData(randomIntBetween(3, 4), true, INDEX_NAME); - - GetIndexResponse getIndexResponse = client().admin().indices().getIndex(new GetIndexRequest()).get(); - String indexUUID = getIndexResponse.getSettings().get(INDEX_NAME).get(IndexMetadata.SETTING_INDEX_UUID); - - logger.info("--> Corrupting segment files in remote segment store"); - Path path = segmentRepoPath.resolve(indexUUID).resolve("0").resolve("segments").resolve("data"); - try (Stream dataPath = Files.list(path)) { - CorruptionUtils.corruptFile(random(), dataPath.toArray(Path[]::new)); - } - - logger.info("--> Stop primary"); - internalCluster().stopRandomNode(InternalTestCluster.nameFilter(primaryNodeName(INDEX_NAME))); - - logger.info("--> Close and restore the index"); - client().admin() - .cluster() - .restoreRemoteStore(new RestoreRemoteStoreRequest().indices(INDEX_NAME).waitForCompletion(true), PlainActionFuture.newFuture()); - - logger.info("--> Check for index status, should be red due to corruption"); - ensureRed(INDEX_NAME); - } - // TODO: Restore flow - index aliases } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 8ae25c6758195..b1dbb0a900bc7 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -15,6 +15,8 @@ import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStatsResponse; import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.coordination.FollowersChecker; +import org.opensearch.cluster.coordination.LeaderChecker; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.routing.ShardRoutingState; @@ -23,15 +25,20 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.remote.RemoteSegmentTransferTracker; import org.opensearch.index.remote.RemoteTranslogTransferTracker; +import org.opensearch.plugins.Plugin; import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; -import org.junit.Before; +import org.opensearch.test.disruption.NetworkDisruption; +import org.opensearch.test.transport.MockTransportService; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import java.util.stream.Stream; @@ -44,12 +51,17 @@ public class RemoteStoreStatsIT extends RemoteStoreBaseIntegTestCase { private static final String INDEX_NAME = "remote-store-test-idx-1"; - @Before + @Override + protected Collection> nodePlugins() { + return Arrays.asList(MockTransportService.TestPlugin.class); + } + public void setup() { internalCluster().startNodes(3); } public void testStatsResponseFromAllNodes() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -118,6 +130,7 @@ public void testStatsResponseFromAllNodes() { } public void testStatsResponseAllShards() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -175,6 +188,7 @@ public void testStatsResponseAllShards() { } public void testStatsResponseFromLocalNode() { + setup(); // Step 1 - We create cluster, create an index, and then index documents into. We also do multiple refreshes/flushes // during this time frame. This ensures that the segment upload has started. @@ -236,6 +250,7 @@ public void testStatsResponseFromLocalNode() { } public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exception { + setup(); // Scenario: // - Create index with single primary and single replica shard // - Disable Refresh Interval for the index @@ -325,6 +340,7 @@ public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exce } public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() throws Exception { + setup(); // Scenario: // - Create index with single primary and N-1 replica shards (N = no of data nodes) // - Disable Refresh Interval for the index @@ -416,6 +432,7 @@ public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() thr } public void testStatsOnShardRelocation() { + setup(); // Scenario: // - Create index with single primary and single replica shard // - Index documents @@ -471,6 +488,7 @@ public void testStatsOnShardRelocation() { } public void testStatsOnShardUnassigned() throws IOException { + setup(); // Scenario: // - Create index with single primary and two replica shard // - Index documents @@ -497,6 +515,7 @@ public void testStatsOnShardUnassigned() throws IOException { } public void testStatsOnRemoteStoreRestore() throws IOException { + setup(); // Creating an index with primary shard count == total nodes in cluster and 0 replicas int dataNodeCount = client().admin().cluster().prepareHealth().get().getNumberOfDataNodes(); createIndex(INDEX_NAME, remoteStoreIndexSettings(0, dataNodeCount)); @@ -544,6 +563,7 @@ public void testStatsOnRemoteStoreRestore() throws IOException { } public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exception { + setup(); // Create an index with one primary and one replica shard createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 1)); ensureGreen(INDEX_NAME); @@ -561,26 +581,80 @@ public void testNonZeroPrimaryStatsOnNewlyCreatedIndexWithZeroDocs() throws Exce .getRemoteStoreStats(); Arrays.stream(remoteStoreStats).forEach(statObject -> { RemoteSegmentTransferTracker.Stats segmentStats = statObject.getSegmentStats(); + RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); if (statObject.getShardRouting().primary()) { assertTrue( segmentStats.totalUploadsSucceeded == 1 && segmentStats.totalUploadsStarted == segmentStats.totalUploadsSucceeded && segmentStats.totalUploadsFailed == 0 ); + // On primary shard creation, we upload to remote translog post primary mode activation. + // This changes upload stats to non-zero for primary shard. + assertNonZeroTranslogUploadStatsNoFailures(translogStats); } else { assertTrue( segmentStats.directoryFileTransferTrackerStats.transferredBytesStarted == 0 && segmentStats.directoryFileTransferTrackerStats.transferredBytesSucceeded == 0 ); + assertZeroTranslogUploadStats(translogStats); } - - RemoteTranslogTransferTracker.Stats translogStats = statObject.getTranslogStats(); - assertZeroTranslogUploadStats(translogStats); assertZeroTranslogDownloadStats(translogStats); }); }, 5, TimeUnit.SECONDS); } + public void testStatsCorrectnessOnFailover() { + Settings clusterSettings = Settings.builder() + .put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "100ms") + .put(LeaderChecker.LEADER_CHECK_INTERVAL_SETTING.getKey(), "500ms") + .put(LeaderChecker.LEADER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(FollowersChecker.FOLLOWER_CHECK_TIMEOUT_SETTING.getKey(), "100ms") + .put(FollowersChecker.FOLLOWER_CHECK_INTERVAL_SETTING.getKey(), "500ms") + .put(FollowersChecker.FOLLOWER_CHECK_RETRY_COUNT_SETTING.getKey(), 1) + .put(nodeSettings(0)) + .build(); + String clusterManagerNode = internalCluster().startClusterManagerOnlyNode(clusterSettings); + internalCluster().startDataOnlyNodes(2, clusterSettings); + + // Create an index with one primary and one replica shard + createIndex(INDEX_NAME, remoteStoreIndexSettings(1, 1)); + ensureGreen(INDEX_NAME); + + // Index some docs and refresh + indexDocs(); + refresh(INDEX_NAME); + + String primaryNode = primaryNodeName(INDEX_NAME); + String replicaNode = replicaNodeName(INDEX_NAME); + + // Start network disruption - primary node will be isolated + Set nodesInOneSide = Stream.of(clusterManagerNode, replicaNode).collect(Collectors.toCollection(HashSet::new)); + Set nodesInOtherSide = Stream.of(primaryNode).collect(Collectors.toCollection(HashSet::new)); + NetworkDisruption networkDisruption = new NetworkDisruption( + new NetworkDisruption.TwoPartitions(nodesInOneSide, nodesInOtherSide), + NetworkDisruption.DISCONNECT + ); + internalCluster().setDisruptionScheme(networkDisruption); + logger.info("--> network disruption is started"); + networkDisruption.startDisrupting(); + ensureStableCluster(2, clusterManagerNode); + + RemoteStoreStatsResponse response = client(clusterManagerNode).admin().cluster().prepareRemoteStoreStats(INDEX_NAME, "0").get(); + final String indexShardId = String.format(Locale.ROOT, "[%s][%s]", INDEX_NAME, "0"); + List matches = Arrays.stream(response.getRemoteStoreStats()) + .filter(stat -> indexShardId.equals(stat.getSegmentStats().shardId.toString())) + .collect(Collectors.toList()); + assertEquals(1, matches.size()); + RemoteSegmentTransferTracker.Stats segmentStats = matches.get(0).getSegmentStats(); + assertEquals(0, segmentStats.refreshTimeLagMs); + + networkDisruption.stopDisrupting(); + internalCluster().clearDisruptionScheme(); + ensureStableCluster(3, clusterManagerNode); + ensureGreen(INDEX_NAME); + logger.info("Test completed"); + } + private void indexDocs() { for (int i = 0; i < randomIntBetween(5, 10); i++) { if (randomBoolean()) { diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index e9bfa358103c8..874713b51d627 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.discovery.DiscoveryStats; import org.opensearch.http.HttpStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; import org.opensearch.index.store.remote.filecache.FileCacheStats; @@ -129,6 +130,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchBackpressureStats searchBackpressureStats; + @Nullable + private SegmentReplicationRejectionStats segmentReplicationRejectionStats; + @Nullable private ClusterManagerThrottlingStats clusterManagerThrottlingStats; @@ -211,6 +215,11 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + segmentReplicationRejectionStats = in.readOptionalWriteable(SegmentReplicationRejectionStats::new); + } else { + segmentReplicationRejectionStats = null; + } if (in.getVersion().onOrAfter(Version.V_2_12_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -244,6 +253,7 @@ public NodeStats( @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, @Nullable SearchPipelineStats searchPipelineStats, + @Nullable SegmentReplicationRejectionStats segmentReplicationRejectionStats, @Nullable RepositoriesStats repositoriesStats ) { super(node); @@ -271,6 +281,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.segmentReplicationRejectionStats = segmentReplicationRejectionStats; this.repositoriesStats = repositoriesStats; } @@ -414,6 +425,11 @@ public SearchPipelineStats getSearchPipelineStats() { return searchPipelineStats; } + @Nullable + public SegmentReplicationRejectionStats getSegmentReplicationRejectionStats() { + return segmentReplicationRejectionStats; + } + @Nullable public RepositoriesStats getRepositoriesStats() { return repositoriesStats; @@ -465,6 +481,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(resourceUsageStats); } + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeOptionalWriteable(segmentReplicationRejectionStats); + } if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -561,6 +580,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getSegmentReplicationRejectionStats() != null) { + getSegmentReplicationRejectionStats().toXContent(builder, params); + } + if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 88dff20354aa2..fc72668d36413 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -215,6 +215,7 @@ public enum Metric { TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), RESOURCE_USAGE_STATS("resource_usage_stats"), + SEGMENT_REPLICATION_BACKPRESSURE("segment_replication_backpressure"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index aa02f8e580f4a..99cf42cfdc4d0 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -126,6 +126,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.SEGMENT_REPLICATION_BACKPRESSURE.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index f51fabbfb2388..5efec8b876435 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -170,6 +170,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java index 726ba7ba119af..4a9b07c12821d 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportBulkAction.java @@ -85,6 +85,11 @@ import org.opensearch.ingest.IngestService; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportService; @@ -133,6 +138,7 @@ public class TransportBulkAction extends HandledTransportAction() { - @Override - public void onResponse(BulkShardResponse bulkShardResponse) { - for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { - // we may have no response if item failed - if (bulkItemResponse.getResponse() != null) { - bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); - } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(bulkItemResponse.getItemId(), bulkItemResponse); - } + final Span span = tracer.startSpan(SpanBuilder.from("bulkShardAction", nodeId, bulkShardRequest)); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + shardBulkAction.execute( + bulkShardRequest, + TraceableActionListener.create(ActionListener.runBefore(new ActionListener() { + @Override + public void onResponse(BulkShardResponse bulkShardResponse) { + for (BulkItemResponse bulkItemResponse : bulkShardResponse.getResponses()) { + // we may have no response if item failed + if (bulkItemResponse.getResponse() != null) { + bulkItemResponse.getResponse().setShardInfo(bulkShardResponse.getShardInfo()); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + docStatusStats.inc(bulkItemResponse.status()); + responses.set(bulkItemResponse.getItemId(), bulkItemResponse); + } - @Override - public void onFailure(Exception e) { - // create failures for all relevant requests - for (BulkItemRequest request : requests) { - final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); - final DocWriteRequest docWriteRequest = request.request(); - final BulkItemResponse bulkItemResponse = new BulkItemResponse( - request.id(), - docWriteRequest.opType(), - new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) - ); + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - docStatusStats.inc(bulkItemResponse.status()); - responses.set(request.id(), bulkItemResponse); - } + @Override + public void onFailure(Exception e) { + // create failures for all relevant requests + for (BulkItemRequest request : requests) { + final String indexName = concreteIndices.getConcreteIndex(request.index()).getName(); + final DocWriteRequest docWriteRequest = request.request(); + final BulkItemResponse bulkItemResponse = new BulkItemResponse( + request.id(), + docWriteRequest.opType(), + new BulkItemResponse.Failure(indexName, docWriteRequest.id(), e) + ); + + docStatusStats.inc(bulkItemResponse.status()); + responses.set(request.id(), bulkItemResponse); + } - if (counter.decrementAndGet() == 0) { - finishHim(); - } - } + if (counter.decrementAndGet() == 0) { + finishHim(); + } + } - private void finishHim() { - indicesService.addDocStatusStats(docStatusStats); - listener.onResponse( - new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(startTimeNanos)) - ); - } - }, releasable::close)); + private void finishHim() { + indicesService.addDocStatusStats(docStatusStats); + listener.onResponse( + new BulkResponse( + responses.toArray(new BulkItemResponse[responses.length()]), + buildTookInMillis(startTimeNanos) + ) + ); + } + }, releasable::close), span, tracer) + ); + } catch (Exception e) { + span.setError(e); + span.endSpan(); + throw e; + } } bulkRequest = null; // allow memory for bulk request items to be reclaimed before all items have been completed } diff --git a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java index fddda0ef1f9a7..268a6ed6f85b8 100644 --- a/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java +++ b/server/src/main/java/org/opensearch/action/bulk/TransportShardBulkAction.java @@ -99,6 +99,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.node.NodeClosedException; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportChannel; @@ -161,7 +162,8 @@ public TransportShardBulkAction( IndexingPressureService indexingPressureService, SegmentReplicationPressureService segmentReplicationPressureService, RemoteStorePressureService remoteStorePressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -177,7 +179,8 @@ public TransportShardBulkAction( EXECUTOR_NAME_FUNCTION, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); this.updateHelper = updateHelper; this.mappingUpdatedAction = mappingUpdatedAction; diff --git a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java index 032fe83e2220b..9d60706d1f100 100644 --- a/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java +++ b/server/src/main/java/org/opensearch/action/resync/TransportResyncReplicationAction.java @@ -54,6 +54,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; import org.opensearch.transport.TransportException; @@ -93,7 +94,8 @@ public TransportResyncReplicationAction( ShardStateAction shardStateAction, ActionFilters actionFilters, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { super( settings, @@ -109,7 +111,8 @@ public TransportResyncReplicationAction( EXECUTOR_NAME_FUNCTION, true, /* we should never reject resync because of thread pool capacity on primary */ indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 9e50213eab5f9..fb026dae630b7 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -256,7 +256,7 @@ public SearchRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_7_0)) { pipeline = in.readOptionalString(); } - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalBoolean(); } } @@ -290,7 +290,7 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_7_0)) { out.writeOptionalString(pipeline); } - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalBoolean(phaseTook); } } diff --git a/server/src/main/java/org/opensearch/action/search/SearchResponse.java b/server/src/main/java/org/opensearch/action/search/SearchResponse.java index 91f0dc0737637..96d07982d03db 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/opensearch/action/search/SearchResponse.java @@ -116,7 +116,7 @@ public SearchResponse(StreamInput in) throws IOException { clusters = new Clusters(in); scrollId = in.readOptionalString(); tookInMillis = in.readVLong(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { phaseTook = in.readOptionalWriteable(PhaseTook::new); } else { phaseTook = null; @@ -557,7 +557,7 @@ public void writeTo(StreamOutput out) throws IOException { clusters.writeTo(out); out.writeOptionalString(scrollId); out.writeVLong(tookInMillis); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeOptionalWriteable(phaseTook); } out.writeVInt(skippedShards); diff --git a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java index a0b5299805868..9ebfa8cfd0df8 100644 --- a/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java +++ b/server/src/main/java/org/opensearch/action/support/replication/TransportWriteAction.java @@ -59,6 +59,11 @@ import org.opensearch.index.translog.Translog.Location; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.Span; +import org.opensearch.telemetry.tracing.SpanBuilder; +import org.opensearch.telemetry.tracing.SpanScope; +import org.opensearch.telemetry.tracing.Tracer; +import org.opensearch.telemetry.tracing.listener.TraceableActionListener; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -82,6 +87,7 @@ public abstract class TransportWriteAction< protected final SystemIndices systemIndices; private final Function executorFunction; + private final Tracer tracer; protected TransportWriteAction( Settings settings, @@ -97,7 +103,8 @@ protected TransportWriteAction( Function executorFunction, boolean forceExecutionOnPrimary, IndexingPressureService indexingPressureService, - SystemIndices systemIndices + SystemIndices systemIndices, + Tracer tracer ) { // We pass ThreadPool.Names.SAME to the super class as we control the dispatching to the // ThreadPool.Names.WRITE/ThreadPool.Names.SYSTEM_WRITE thread pools in this class. @@ -119,6 +126,7 @@ protected TransportWriteAction( this.executorFunction = executorFunction; this.indexingPressureService = indexingPressureService; this.systemIndices = systemIndices; + this.tracer = tracer; } protected String executor(IndexShard shard) { @@ -220,7 +228,12 @@ protected void shardOperationOnPrimary( threadPool.executor(executor).execute(new ActionRunnable>(listener) { @Override protected void doRun() { - dispatchedShardOperationOnPrimary(request, primary, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnPrimary", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnPrimary(request, primary, TraceableActionListener.create(listener, span, tracer)); + } } @Override @@ -248,7 +261,12 @@ protected void shardOperationOnReplica(ReplicaRequest request, IndexShard replic threadPool.executor(executorFunction.apply(replica)).execute(new ActionRunnable(listener) { @Override protected void doRun() { - dispatchedShardOperationOnReplica(request, replica, listener); + Span span = tracer.startSpan( + SpanBuilder.from("dispatchedShardOperationOnReplica", clusterService.localNode().getId(), request) + ); + try (SpanScope spanScope = tracer.withSpanInScope(span)) { + dispatchedShardOperationOnReplica(request, replica, TraceableActionListener.create(listener, span, tracer)); + } } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java index a339852e6ed8d..987a3e3ffa7d3 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/CoordinationState.java @@ -638,6 +638,12 @@ public interface PersistedState extends Closeable { */ void setLastAcceptedState(ClusterState clusterState); + /** + * Returns the stats for the persistence layer for {@link CoordinationState}. + * @return PersistedStateStats + */ + PersistedStateStats getStats(); + /** * Marks the last accepted cluster state as committed. * After a successful call to this method, {@link #getLastAcceptedState()} should return the last cluster state that was set, diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index eb30460ca1b7f..a4ffab7fb70c9 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -56,6 +56,7 @@ import org.opensearch.cluster.service.ClusterApplier; import org.opensearch.cluster.service.ClusterApplier.ClusterApplyListener; import org.opensearch.cluster.service.ClusterManagerService; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.Booleans; import org.opensearch.common.Nullable; import org.opensearch.common.Priority; @@ -865,7 +866,16 @@ protected void doStart() { @Override public DiscoveryStats stats() { - return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats()); + ClusterStateStats clusterStateStats = clusterManagerService.getClusterStateStats(); + ArrayList stats = new ArrayList<>(); + Stream.of(PersistedStateRegistry.PersistedStateType.values()).forEach(stateType -> { + if (persistedStateRegistry.getPersistedState(stateType) != null + && persistedStateRegistry.getPersistedState(stateType).getStats() != null) { + stats.add(persistedStateRegistry.getPersistedState(stateType).getStats()); + } + }); + clusterStateStats.setPersistenceStats(stats); + return new DiscoveryStats(new PendingClusterStateStats(0, 0, 0), publicationHandler.stats(), clusterStateStats); } @Override diff --git a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java index 67ef82ee7b2e9..b77ede5471534 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/InMemoryPersistedState.java @@ -65,6 +65,11 @@ public void setLastAcceptedState(ClusterState clusterState) { this.acceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public long getCurrentTerm() { return currentTerm; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java new file mode 100644 index 0000000000000..1dc20e564ade2 --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/coordination/PersistedStateStats.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.coordination; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Persisted cluster state related stats. + * + * @opensearch.internal + */ +public class PersistedStateStats implements Writeable, ToXContentObject { + private String statsName; + private AtomicLong totalTimeInMillis = new AtomicLong(0); + private AtomicLong failedCount = new AtomicLong(0); + private AtomicLong successCount = new AtomicLong(0); + private Map extendedFields = new HashMap<>(); // keeping minimal extensibility + + public PersistedStateStats(String statsName) { + this.statsName = statsName; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(successCount.get()); + out.writeVLong(failedCount.get()); + out.writeVLong(totalTimeInMillis.get()); + if (extendedFields.size() > 0) { + out.writeBoolean(true); + out.writeVInt(extendedFields.size()); + for (Map.Entry extendedField : extendedFields.entrySet()) { + out.writeString(extendedField.getKey()); + out.writeVLong(extendedField.getValue().get()); + } + } else { + out.writeBoolean(false); + } + } + + public PersistedStateStats(StreamInput in) throws IOException { + this.successCount = new AtomicLong(in.readVLong()); + this.failedCount = new AtomicLong(in.readVLong()); + this.totalTimeInMillis = new AtomicLong(in.readVLong()); + if (in.readBoolean()) { + int extendedFieldsSize = in.readVInt(); + this.extendedFields = new HashMap<>(); + for (int fieldNumber = 0; fieldNumber < extendedFieldsSize; fieldNumber++) { + extendedFields.put(in.readString(), new AtomicLong(in.readVLong())); + } + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(statsName); + builder.field(Fields.SUCCESS_COUNT, getSuccessCount()); + builder.field(Fields.FAILED_COUNT, getFailedCount()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getTotalTimeInMillis()); + if (extendedFields.size() > 0) { + for (Map.Entry extendedField : extendedFields.entrySet()) { + builder.field(extendedField.getKey(), extendedField.getValue().get()); + } + } + builder.endObject(); + return builder; + } + + public void stateFailed() { + failedCount.incrementAndGet(); + } + + public void stateSucceeded() { + successCount.incrementAndGet(); + } + + /** + * Expects user to send time taken in milliseconds. + * + * @param timeTakenInUpload time taken in uploading the cluster state to remote + */ + public void stateTook(long timeTakenInUpload) { + totalTimeInMillis.addAndGet(timeTakenInUpload); + } + + public long getTotalTimeInMillis() { + return totalTimeInMillis.get(); + } + + public long getFailedCount() { + return failedCount.get(); + } + + public long getSuccessCount() { + return successCount.get(); + } + + protected void addToExtendedFields(String extendedField, AtomicLong extendedFieldValue) { + this.extendedFields.put(extendedField, extendedFieldValue); + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String SUCCESS_COUNT = "success_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 626903877b0c6..70c1d059a1b9e 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -922,19 +922,26 @@ public static boolean isGlobalStateEquals(Metadata metadata1, Metadata metadata2 if (!metadata1.coordinationMetadata.equals(metadata2.coordinationMetadata)) { return false; } - if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { + if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { return false; } - if (!metadata1.hashesOfConsistentSettings.equals(metadata2.hashesOfConsistentSettings)) { + if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { return false; } - if (!metadata1.templates.equals(metadata2.templates())) { + if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { return false; } - if (!metadata1.clusterUUID.equals(metadata2.clusterUUID)) { + return isGlobalResourcesMetadataEquals(metadata1, metadata2); + } + + /** + * Compares Metadata entities persisted in Remote Store. + */ + public static boolean isGlobalResourcesMetadataEquals(Metadata metadata1, Metadata metadata2) { + if (!metadata1.persistentSettings.equals(metadata2.persistentSettings)) { return false; } - if (metadata1.clusterUUIDCommitted != metadata2.clusterUUIDCommitted) { + if (!metadata1.templates.equals(metadata2.templates())) { return false; } // Check if any persistent metadata needs to be saved diff --git a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java index 8d76a39712ee3..78a22fe11f072 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/MetadataCreateIndexService.java @@ -1252,6 +1252,7 @@ List getIndexSettingsValidationErrors( if (forbidPrivateIndexSettings) { validationErrors.addAll(validatePrivateSettingsNotExplicitlySet(settings, indexScopedSettings)); } + validateIndexReplicationTypeSettings(settings, clusterService.getClusterSettings()).ifPresent(validationErrors::add); if (indexName.isEmpty() || indexName.get().charAt(0) != '.') { // Apply aware replica balance validation only to non system indices int replicaCount = settings.getAsInt( @@ -1306,6 +1307,24 @@ private static List validateIndexCustomPath(Settings settings, @Nullable return validationErrors; } + /** + * Validates {@code index.replication.type} is not set if {@code cluster.restrict.index.replication_type} is set to true. + * + * @param requestSettings settings passed in during index create request + * @param clusterSettings cluster setting + */ + private static Optional validateIndexReplicationTypeSettings(Settings requestSettings, ClusterSettings clusterSettings) { + if (requestSettings.hasValue(SETTING_REPLICATION_TYPE) + && clusterSettings.get(IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING)) { + return Optional.of( + "index setting [index.replication.type] is not allowed to be set as [" + + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey() + + "=true]" + ); + } + return Optional.empty(); + } + /** * Validates the settings and mappings for shrinking an index. * diff --git a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java index 4e49b25eb5789..0c58aabf95207 100644 --- a/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java +++ b/server/src/main/java/org/opensearch/cluster/node/DiscoveryNode.java @@ -553,7 +553,13 @@ public String toString() { sb.append('}'); } if (!attributes.isEmpty()) { - sb.append(attributes); + sb.append( + attributes.entrySet() + .stream() + .filter(entry -> !entry.getKey().startsWith(REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)) // filter remote_store attributes + // from logging to reduce noise. + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)) + ); } return sb.toString(); } diff --git a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java index b12698c8a320e..d77d44580798a 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/opensearch/cluster/routing/IndexRoutingTable.java @@ -466,12 +466,12 @@ public Builder initializeAsRemoteStoreRestore( } for (int shardNumber = 0; shardNumber < indexMetadata.getNumberOfShards(); shardNumber++) { ShardId shardId = new ShardId(index, shardNumber); - if (forceRecoverAllPrimaries == false && indexShardRoutingTableMap.containsKey(shardId) == false) { + if (indexShardRoutingTableMap.containsKey(shardId) == false) { throw new IllegalStateException("IndexShardRoutingTable is not present for shardId: " + shardId); } IndexShardRoutingTable.Builder indexShardRoutingBuilder = new IndexShardRoutingTable.Builder(shardId); IndexShardRoutingTable indexShardRoutingTable = indexShardRoutingTableMap.get(shardId); - if (forceRecoverAllPrimaries || indexShardRoutingTable == null || indexShardRoutingTable.primaryShard().unassigned()) { + if (forceRecoverAllPrimaries || indexShardRoutingTable.primaryShard().unassigned()) { // Primary shard to be recovered from remote store. indexShardRoutingBuilder.addShard(ShardRouting.newUnassigned(shardId, true, recoverySource, unassignedInfo)); // All the replica shards to be recovered from peer recovery. diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java new file mode 100644 index 0000000000000..96683ce720d0b --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterStateStats.java @@ -0,0 +1,120 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.service; + +import org.opensearch.cluster.coordination.PersistedStateStats; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Cluster state related stats. + * + * @opensearch.internal + */ +public class ClusterStateStats implements Writeable, ToXContentObject { + + private AtomicLong updateSuccess = new AtomicLong(0); + private AtomicLong updateTotalTimeInMillis = new AtomicLong(0); + private AtomicLong updateFailed = new AtomicLong(0); + private List persistenceStats = new ArrayList<>(); + + public ClusterStateStats() {} + + public long getUpdateSuccess() { + return updateSuccess.get(); + } + + public long getUpdateTotalTimeInMillis() { + return updateTotalTimeInMillis.get(); + } + + public long getUpdateFailed() { + return updateFailed.get(); + } + + public List getPersistenceStats() { + return persistenceStats; + } + + public void stateUpdated() { + updateSuccess.incrementAndGet(); + } + + public void stateUpdateFailed() { + updateFailed.incrementAndGet(); + } + + public void stateUpdateTook(long stateUpdateTime) { + updateTotalTimeInMillis.addAndGet(stateUpdateTime); + } + + public ClusterStateStats setPersistenceStats(List persistenceStats) { + this.persistenceStats = persistenceStats; + return this; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(updateSuccess.get()); + out.writeVLong(updateTotalTimeInMillis.get()); + out.writeVLong(updateFailed.get()); + out.writeVInt(persistenceStats.size()); + for (PersistedStateStats stats : persistenceStats) { + stats.writeTo(out); + } + } + + public ClusterStateStats(StreamInput in) throws IOException { + this.updateSuccess = new AtomicLong(in.readVLong()); + this.updateTotalTimeInMillis = new AtomicLong(in.readVLong()); + this.updateFailed = new AtomicLong(in.readVLong()); + int persistedStatsSize = in.readVInt(); + this.persistenceStats = new ArrayList<>(); + for (int statsNumber = 0; statsNumber < persistedStatsSize; statsNumber++) { + PersistedStateStats stats = new PersistedStateStats(in); + this.persistenceStats.add(stats); + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.CLUSTER_STATE_STATS); + builder.startObject(Fields.OVERALL); + builder.field(Fields.UPDATE_COUNT, getUpdateSuccess()); + builder.field(Fields.TOTAL_TIME_IN_MILLIS, getUpdateTotalTimeInMillis()); + builder.field(Fields.FAILED_COUNT, getUpdateFailed()); + builder.endObject(); + for (PersistedStateStats stats : persistenceStats) { + stats.toXContent(builder, params); + } + builder.endObject(); + return builder; + } + + /** + * Fields for parsing and toXContent + * + * @opensearch.internal + */ + static final class Fields { + static final String CLUSTER_STATE_STATS = "cluster_state_stats"; + static final String OVERALL = "overall"; + static final String UPDATE_COUNT = "update_count"; + static final String TOTAL_TIME_IN_MILLIS = "total_time_in_millis"; + static final String FAILED_COUNT = "failed_count"; + } +} diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index 563b69dfd0e2a..07c3f93ae6486 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -112,7 +112,9 @@ public class MasterService extends AbstractLifecycleComponent { static final String CLUSTER_MANAGER_UPDATE_THREAD_NAME = "clusterManagerService#updateTask"; - /** @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} */ + /** + * @deprecated As of 2.2, because supporting inclusive language, replaced by {@link #CLUSTER_MANAGER_UPDATE_THREAD_NAME} + */ @Deprecated static final String MASTER_UPDATE_THREAD_NAME = "masterService#updateTask"; @@ -130,6 +132,7 @@ public class MasterService extends AbstractLifecycleComponent { private volatile Batcher taskBatcher; protected final ClusterManagerTaskThrottler clusterManagerTaskThrottler; private final ClusterManagerThrottlingStats throttlingStats; + private final ClusterStateStats stateStats; public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { this.nodeName = Objects.requireNonNull(Node.NODE_NAME_SETTING.get(settings)); @@ -147,6 +150,7 @@ public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadP this::getMinNodeVersion, throttlingStats ); + this.stateStats = new ClusterStateStats(); this.threadPool = threadPool; } @@ -339,7 +343,7 @@ private TimeValue getTimeSince(long startTimeNanos) { return TimeValue.timeValueMillis(TimeValue.nsecToMSec(threadPool.preciseRelativeTimeInNanos() - startTimeNanos)); } - protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeMillis) { + protected void publish(ClusterChangedEvent clusterChangedEvent, TaskOutputs taskOutputs, long startTimeNanos) { final PlainActionFuture fut = new PlainActionFuture() { @Override protected boolean blockingAllowed() { @@ -352,8 +356,12 @@ protected boolean blockingAllowed() { try { FutureUtils.get(fut); onPublicationSuccess(clusterChangedEvent, taskOutputs); + final long durationMillis = getTimeSince(startTimeNanos).millis(); + stateStats.stateUpdateTook(durationMillis); + stateStats.stateUpdated(); } catch (Exception e) { - onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeMillis, e); + stateStats.stateUpdateFailed(); + onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeNanos, e); } } @@ -464,7 +472,6 @@ public Builder incrementVersion(ClusterState clusterState) { * @param source the source of the cluster state update task * @param updateTask the full context for the cluster state update * task - * */ public & ClusterStateTaskListener> void submitStateUpdateTask( String source, @@ -490,7 +497,6 @@ public & Cluster * @param listener callback after the cluster state update task * completes * @param the type of the cluster state update task state - * */ public void submitStateUpdateTask( String source, @@ -947,7 +953,7 @@ void onNoLongerClusterManager() { /** * Functionality for register task key to cluster manager node. * - * @param taskKey - task key of task + * @param taskKey - task key of task * @param throttlingEnabled - throttling is enabled for task or not i.e does data node perform retries on it or not * @return throttling task key which needs to be passed while submitting task to cluster manager */ @@ -966,7 +972,6 @@ public ClusterManagerTaskThrottler.ThrottlingKey registerClusterManagerTask(Stri * that share the same executor will be executed * batches on this executor * @param the type of the cluster state update task state - * */ public void submitStateUpdateTasks( final String source, @@ -996,4 +1001,8 @@ public void submitStateUpdateTasks( } } + public ClusterStateStats getClusterStateStats() { + return stateStats; + } + } diff --git a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java index b8c0b52f93a3c..3f341c878c3c7 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java +++ b/server/src/main/java/org/opensearch/common/blobstore/stream/write/WritePriority.java @@ -15,5 +15,6 @@ */ public enum WritePriority { NORMAL, - HIGH + HIGH, + URGENT } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 7ac7da819b215..c2c6effc3336f 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -682,13 +682,16 @@ public void apply(Settings value, Settings current, Settings previous) { // Remote cluster state settings RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING, + RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, + RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, RemoteStoreNodeService.REMOTE_STORE_COMPATIBILITY_MODE_SETTING, IndicesService.CLUSTER_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING, AdmissionControlSettings.ADMISSION_CONTROL_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.CPU_BASED_ADMISSION_CONTROLLER_TRANSPORT_LAYER_MODE, CPUBasedAdmissionControllerSettings.INDEXING_CPU_USAGE_LIMIT, - CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT + CPUBasedAdmissionControllerSettings.SEARCH_CPU_USAGE_LIMIT, + IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 83bf8c82ee3dd..62e8faf33e1fa 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -221,6 +221,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { // Settings for remote translog IndexSettings.INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, + IndexSettings.INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING, // Settings for remote store enablement IndexMetadata.INDEX_REMOTE_STORE_ENABLED_SETTING, diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java index 665ecf77d7aa7..ea93ccd09ed39 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryStats.java @@ -32,8 +32,10 @@ package org.opensearch.discovery; +import org.opensearch.Version; import org.opensearch.cluster.coordination.PendingClusterStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -51,21 +53,31 @@ public class DiscoveryStats implements Writeable, ToXContentFragment { private final PendingClusterStateStats queueStats; private final PublishClusterStateStats publishStats; + private final ClusterStateStats clusterStateStats; - public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats) { + public DiscoveryStats(PendingClusterStateStats queueStats, PublishClusterStateStats publishStats, ClusterStateStats clusterStateStats) { this.queueStats = queueStats; this.publishStats = publishStats; + this.clusterStateStats = clusterStateStats; } public DiscoveryStats(StreamInput in) throws IOException { queueStats = in.readOptionalWriteable(PendingClusterStateStats::new); publishStats = in.readOptionalWriteable(PublishClusterStateStats::new); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + clusterStateStats = in.readOptionalWriteable(ClusterStateStats::new); + } else { + clusterStateStats = null; + } } @Override public void writeTo(StreamOutput out) throws IOException { out.writeOptionalWriteable(queueStats); out.writeOptionalWriteable(publishStats); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(clusterStateStats); + } } @Override @@ -77,6 +89,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (publishStats != null) { publishStats.toXContent(builder, params); } + if (clusterStateStats != null) { + clusterStateStats.toXContent(builder, params); + } builder.endObject(); return builder; } @@ -92,4 +107,8 @@ public PendingClusterStateStats getQueueStats() { public PublishClusterStateStats getPublishStats() { return publishStats; } + + public ClusterStateStats getClusterStateStats() { + return clusterStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java index 4c562b348f141..1563ac84bdd1c 100644 --- a/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java +++ b/server/src/main/java/org/opensearch/gateway/ClusterStateUpdaters.java @@ -41,7 +41,6 @@ import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; -import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.common.settings.ClusterSettings; @@ -121,21 +120,7 @@ static ClusterState updateRoutingTable(final ClusterState state) { // initialize all index routing tables as empty final RoutingTable.Builder routingTableBuilder = RoutingTable.builder(state.routingTable()); for (final IndexMetadata cursor : state.metadata().indices().values()) { - // Whether IndexMetadata is recovered from local disk or remote it doesn't matter to us at this point. - // We are only concerned about index data recovery here. Which is why we only check for remote store enabled and not for remote - // cluster state enabled. - if (cursor.getSettings().getAsBoolean(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, false) == false - || state.routingTable().hasIndex(cursor.getIndex()) == false - || state.routingTable() - .index(cursor.getIndex()) - .shardsMatchingPredicateCount( - shardRouting -> shardRouting.primary() - // We need to ensure atleast one of the primaries is being recovered from remote. - // This ensures we have gone through the RemoteStoreRestoreService and routing table is updated - && shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) == 0) { - routingTableBuilder.addAsRecovery(cursor); - } + routingTableBuilder.addAsRecovery(cursor); } // start with 0 based versions for routing table routingTableBuilder.version(0); diff --git a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java index 9eb7fb0ca04d0..350a361a49a62 100644 --- a/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/opensearch/gateway/GatewayMetaState.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.coordination.InMemoryPersistedState; import org.opensearch.cluster.coordination.PersistedStateRegistry; import org.opensearch.cluster.coordination.PersistedStateRegistry.PersistedStateType; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.IndexTemplateMetadata; import org.opensearch.cluster.metadata.Manifest; @@ -67,7 +68,6 @@ import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; import org.opensearch.plugins.MetadataUpgrader; -import org.opensearch.repositories.RepositoryMissingException; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -175,7 +175,9 @@ public void start( if (ClusterState.UNKNOWN_UUID.equals(lastKnownClusterUUID) == false) { // Load state from remote final RemoteRestoreResult remoteRestoreResult = remoteStoreRestoreService.restore( - clusterState, + // Remote Metadata should always override local disk Metadata + // if local disk Metadata's cluster uuid is UNKNOWN_UUID + ClusterState.builder(clusterState).metadata(Metadata.EMPTY_METADATA).build(), lastKnownClusterUUID, false, new String[] {} @@ -550,6 +552,9 @@ static class LucenePersistedState implements PersistedState { // out by this version of OpenSearch. TODO TBD should we avoid indexing when possible? final PersistedClusterStateService.Writer writer = persistedClusterStateService.createWriter(); try { + // During remote state restore, there will be non empty metadata getting persisted with cluster UUID as + // ClusterState.UNKOWN_UUID . The valid UUID will be generated and persisted along with the first cluster state getting + // published. writer.writeFullStateAndCommit(currentTerm, lastAcceptedState); } catch (Exception e) { try { @@ -611,6 +616,12 @@ public void setLastAcceptedState(ClusterState clusterState) { lastAcceptedState = clusterState; } + @Override + public PersistedStateStats getStats() { + // Note: These stats are not published yet, will come in future + return null; + } + private PersistedClusterStateService.Writer getWriterSafe() { final PersistedClusterStateService.Writer writer = persistenceWriter.get(); if (writer == null) { @@ -712,17 +723,17 @@ assert verifyManifestAndClusterState(lastAcceptedManifest, lastAcceptedState) == assert verifyManifestAndClusterState(manifest, clusterState) == true : "Manifest and ClusterState are not in sync"; lastAcceptedManifest = manifest; lastAcceptedState = clusterState; - } catch (RepositoryMissingException e) { - // TODO This logic needs to be modified once PR for repo registration during bootstrap is pushed - // https://github.com/opensearch-project/OpenSearch/pull/9105/ - // After the above PR is pushed, we can remove this silent failure and throw the exception instead. - logger.error("Remote repository is not yet registered"); - lastAcceptedState = clusterState; } catch (Exception e) { + remoteClusterStateService.writeMetadataFailed(); handleExceptionOnWrite(e); } } + @Override + public PersistedStateStats getStats() { + return remoteClusterStateService.getStats(); + } + private boolean verifyManifestAndClusterState(ClusterMetadataManifest manifest, ClusterState clusterState) { assert manifest != null : "ClusterMetadataManifest is null"; assert clusterState != null : "ClusterState is null"; diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 97b37d9532f85..4725f40076ce2 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -262,7 +262,7 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); - if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { this.codecVersion = in.readInt(); this.globalMetadataFileName = in.readString(); } else { @@ -316,7 +316,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); - if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { out.writeInt(codecVersion); out.writeString(globalMetadataFileName); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 96ce2fc779ea0..329ebd0dcd2b8 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -83,9 +83,23 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); - // TODO make this two variable as dynamic setting [issue: #10688] - public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000; - public static final int GLOBAL_METADATA_UPLOAD_WAIT_MILLIS = 20000; + public static final TimeValue INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final TimeValue GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT = TimeValue.timeValueMillis(20000); + + public static final Setting INDEX_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.index_metadata.upload_timeout", + INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public static final Setting GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING = Setting.timeSetting( + "cluster.remote_store.state.global_metadata.upload_timeout", + GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", @@ -141,8 +155,11 @@ public class RemoteClusterStateService implements Closeable { private BlobStoreTransferService blobStoreTransferService; private volatile TimeValue slowWriteLoggingThreshold; - private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + private volatile TimeValue indexMetadataUploadTimeout; + private volatile TimeValue globalMetadataUploadTimeout; + private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + private final RemotePersistenceStats remoteStateStats; public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; @@ -171,7 +188,12 @@ public RemoteClusterStateService( this.relativeTimeNanosSupplier = relativeTimeNanosSupplier; this.threadpool = threadPool; this.slowWriteLoggingThreshold = clusterSettings.get(SLOW_WRITE_LOGGING_THRESHOLD); + this.indexMetadataUploadTimeout = clusterSettings.get(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING); + this.globalMetadataUploadTimeout = clusterSettings.get(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING); clusterSettings.addSettingsUpdateConsumer(SLOW_WRITE_LOGGING_THRESHOLD, this::setSlowWriteLoggingThreshold); + clusterSettings.addSettingsUpdateConsumer(INDEX_METADATA_UPLOAD_TIMEOUT_SETTING, this::setIndexMetadataUploadTimeout); + clusterSettings.addSettingsUpdateConsumer(GLOBAL_METADATA_UPLOAD_TIMEOUT_SETTING, this::setGlobalMetadataUploadTimeout); + this.remoteStateStats = new RemotePersistenceStats(); } private BlobStoreTransferService getBlobStoreTransferService() { @@ -212,6 +234,8 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri false ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " + "wrote full state with [{}] indices", @@ -313,6 +337,8 @@ public ClusterMetadataManifest writeIncrementalMetadata( deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); + remoteStateStats.stateSucceeded(); + remoteStateStats.stateTook(durationMillis); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( "writing cluster state took [{}ms] which is above the warn threshold of [{}]; " @@ -357,7 +383,7 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); - GLOBAL_METADATA_FORMAT.writeAsync( + GLOBAL_METADATA_FORMAT.writeAsyncWithUrgentPriority( clusterState.metadata(), globalMetadataContainer, globalMetadataFilename, @@ -367,7 +393,7 @@ private String writeGlobalMetadata(ClusterState clusterState) throws IOException ); try { - if (latch.await(GLOBAL_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + if (latch.await(getGlobalMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { // TODO: We should add metrics where transfer is timing out. [Issue: #10687] GlobalMetadataTransferException ex = new GlobalMetadataTransferException( String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") @@ -422,7 +448,7 @@ private List writeIndexMetadataParallel(ClusterState clus } try { - if (latch.await(INDEX_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + if (latch.await(getIndexMetadataUploadTimeout().millis(), TimeUnit.MILLISECONDS) == false) { IndexMetadataTransferException ex = new IndexMetadataTransferException( String.format( Locale.ROOT, @@ -489,7 +515,7 @@ private void writeIndexMetadataAsync( ex -> latchedActionListener.onFailure(new IndexMetadataTransferException(indexMetadata.getIndex().toString(), ex)) ); - INDEX_METADATA_FORMAT.writeAsync( + INDEX_METADATA_FORMAT.writeAsyncWithUrgentPriority( indexMetadata, indexMetadataContainer, indexMetadataFilename, @@ -568,7 +594,13 @@ private ClusterMetadataManifest uploadManifest( private void writeMetadataManifest(String clusterName, String clusterUUID, ClusterMetadataManifest uploadManifest, String fileName) throws IOException { final BlobContainer metadataManifestContainer = manifestContainer(clusterName, clusterUUID); - CLUSTER_METADATA_MANIFEST_FORMAT.write(uploadManifest, metadataManifestContainer, fileName, blobStoreRepository.getCompressor()); + CLUSTER_METADATA_MANIFEST_FORMAT.write( + uploadManifest, + metadataManifestContainer, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); } private String fetchPreviousClusterUUID(String clusterName, String clusterUUID) { @@ -615,6 +647,22 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } + private void setIndexMetadataUploadTimeout(TimeValue newIndexMetadataUploadTimeout) { + this.indexMetadataUploadTimeout = newIndexMetadataUploadTimeout; + } + + private void setGlobalMetadataUploadTimeout(TimeValue newGlobalMetadataUploadTimeout) { + this.globalMetadataUploadTimeout = newGlobalMetadataUploadTimeout; + } + + public TimeValue getIndexMetadataUploadTimeout() { + return this.indexMetadataUploadTimeout; + } + + public TimeValue getGlobalMetadataUploadTimeout() { + return this.globalMetadataUploadTimeout; + } + static String getManifestFileName(long term, long version, boolean committed) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ return String.join( @@ -689,10 +737,11 @@ private Map getIndexMetadataMap( * @return {@link IndexMetadata} */ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, UploadedIndexMetadata uploadedIndexMetadata) { + BlobContainer blobContainer = indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()); try { String[] splitPath = uploadedIndexMetadata.getUploadedFilename().split("/"); return INDEX_METADATA_FORMAT.read( - indexMetadataContainer(clusterName, clusterUUID, uploadedIndexMetadata.getIndexUUID()), + blobContainer, splitPath[splitPath.length - 1], blobStoreRepository.getNamedXContentRegistry() ); @@ -725,7 +774,10 @@ public Metadata getLatestMetadata(String clusterName, String clusterUUID) { // Fetch Index Metadata Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); - return Metadata.builder(globalMetadata).indices(indices).build(); + Map indexMetadataMap = new HashMap<>(); + indices.values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); }); + + return Metadata.builder(globalMetadata).indices(indexMetadataMap).build(); } private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { @@ -882,7 +934,8 @@ private Map trimClusterUUIDs( } } else { ClusterMetadataManifest previousManifest = trimmedUUIDs.get(currentManifest.getPreviousClusterUUID()); - if (isMetadataEqual(currentManifest, previousManifest, clusterName)) { + if (isMetadataEqual(currentManifest, previousManifest, clusterName) + && isGlobalMetadataEqual(currentManifest, previousManifest, clusterName)) { trimmedUUIDs.remove(clusterUUID); } } @@ -912,6 +965,12 @@ private boolean isMetadataEqual(ClusterMetadataManifest first, ClusterMetadataMa return true; } + private boolean isGlobalMetadataEqual(ClusterMetadataManifest first, ClusterMetadataManifest second, String clusterName) { + Metadata secondGlobalMetadata = getGlobalMetadata(clusterName, second.getClusterUUID(), second); + Metadata firstGlobalMetadata = getGlobalMetadata(clusterName, first.getClusterUUID(), first); + return Metadata.isGlobalResourcesMetadataEquals(firstGlobalMetadata, secondGlobalMetadata); + } + private boolean isInvalidClusterUUID(ClusterMetadataManifest manifest) { return !manifest.isClusterUUIDCommitted(); } @@ -1005,6 +1064,10 @@ public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } + public void writeMetadataFailed() { + getStats().stateFailed(); + } + /** * Exception for IndexMetadata transfer failures to remote */ @@ -1039,7 +1102,7 @@ public GlobalMetadataTransferException(String errorDesc, Throwable cause) { * @param clusterName name of the cluster * @param clusterUUIDs clusteUUIDs for which the remote state needs to be purged */ - private void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { + void deleteStaleUUIDsClusterMetadata(String clusterName, List clusterUUIDs) { clusterUUIDs.forEach(clusterUUID -> { getBlobStoreTransferService().deleteAsync( ThreadPool.Names.REMOTE_PURGE, @@ -1059,6 +1122,7 @@ public void onFailure(Exception e) { ), e ); + remoteStateStats.cleanUpAttemptFailed(); } } ); @@ -1174,8 +1238,10 @@ private void deleteClusterMetadata( logger.error("Error while fetching Remote Cluster Metadata manifests", e); } catch (IOException e) { logger.error("Error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } catch (Exception e) { logger.error("Unexpected error while deleting stale Remote Cluster Metadata files", e); + remoteStateStats.cleanUpAttemptFailed(); } } @@ -1206,4 +1272,8 @@ public void deleteStaleClusterUUIDs(ClusterState clusterState, ClusterMetadataMa deleteStaleUUIDsClusterMetadata(clusterName, new ArrayList<>(allClustersUUIDsInRemote)); }); } + + public RemotePersistenceStats getStats() { + return remoteStateStats; + } } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java new file mode 100644 index 0000000000000..f2330846fa23e --- /dev/null +++ b/server/src/main/java/org/opensearch/gateway/remote/RemotePersistenceStats.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.gateway.remote; + +import org.opensearch.cluster.coordination.PersistedStateStats; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Remote state related extended stats. + * + * @opensearch.internal + */ +public class RemotePersistenceStats extends PersistedStateStats { + static final String CLEANUP_ATTEMPT_FAILED_COUNT = "cleanup_attempt_failed_count"; + static final String REMOTE_UPLOAD = "remote_upload"; + private AtomicLong cleanupAttemptFailedCount = new AtomicLong(0); + + public RemotePersistenceStats() { + super(REMOTE_UPLOAD); + addToExtendedFields(CLEANUP_ATTEMPT_FAILED_COUNT, cleanupAttemptFailedCount); + } + + public void cleanUpAttemptFailed() { + cleanupAttemptFailedCount.incrementAndGet(); + } + + public long getCleanupAttemptFailedCount() { + return cleanupAttemptFailedCount.get(); + } +} diff --git a/server/src/main/java/org/opensearch/index/IndexSettings.java b/server/src/main/java/org/opensearch/index/IndexSettings.java index e90e9259f6a5c..00e765d73f77f 100644 --- a/server/src/main/java/org/opensearch/index/IndexSettings.java +++ b/server/src/main/java/org/opensearch/index/IndexSettings.java @@ -668,6 +668,14 @@ public static IndexMergePolicy fromString(String text) { Property.IndexScope ); + public static final Setting INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING = Setting.intSetting( + "index.remote_store.translog.keep_extra_gen", + 100, + 0, + Property.Dynamic, + Property.IndexScope + ); + private final Index index; private final Version version; private final Logger logger; @@ -680,6 +688,7 @@ public static IndexMergePolicy fromString(String text) { private final String remoteStoreTranslogRepository; private final String remoteStoreRepository; private final boolean isRemoteSnapshot; + private int remoteTranslogKeepExtraGen; private Version extendedCompatibilitySnapshotVersion; // volatile fields are updated via #updateIndexMetadata(IndexMetadata) under lock @@ -850,6 +859,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti remoteStoreTranslogRepository = settings.get(IndexMetadata.SETTING_REMOTE_TRANSLOG_STORE_REPOSITORY); remoteTranslogUploadBufferInterval = INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING.get(settings); remoteStoreRepository = settings.get(IndexMetadata.SETTING_REMOTE_SEGMENT_STORE_REPOSITORY); + this.remoteTranslogKeepExtraGen = INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING.get(settings); isRemoteSnapshot = IndexModule.Type.REMOTE_SNAPSHOT.match(this.settings); if (isRemoteSnapshot && FeatureFlags.isEnabled(SEARCHABLE_SNAPSHOT_EXTENDED_COMPATIBILITY)) { @@ -1021,9 +1031,13 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti INDEX_REMOTE_TRANSLOG_BUFFER_INTERVAL_SETTING, this::setRemoteTranslogUploadBufferInterval ); + scopedSettings.addSettingsUpdateConsumer(INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING, this::setRemoteTranslogKeepExtraGen); } private void setSearchIdleAfter(TimeValue searchIdleAfter) { + if (this.isRemoteStoreEnabled) { + logger.warn("Search idle is not supported for remote backed indices"); + } if (this.replicationType == ReplicationType.SEGMENT && this.getNumberOfReplicas() > 0) { logger.warn("Search idle is not supported for indices with replicas using 'replication.type: SEGMENT'"); } @@ -1297,6 +1311,10 @@ public TimeValue getRemoteTranslogUploadBufferInterval() { return remoteTranslogUploadBufferInterval; } + public int getRemoteTranslogExtraKeep() { + return remoteTranslogKeepExtraGen; + } + /** * Returns true iff the remote translog buffer interval setting exists or in other words is explicitly set. */ @@ -1308,6 +1326,10 @@ public void setRemoteTranslogUploadBufferInterval(TimeValue remoteTranslogUpload this.remoteTranslogUploadBufferInterval = remoteTranslogUploadBufferInterval; } + public void setRemoteTranslogKeepExtraGen(int extraGen) { + this.remoteTranslogKeepExtraGen = extraGen; + } + /** * Returns this interval in which the shards of this index are asynchronously refreshed. {@code -1} means async refresh is disabled. */ diff --git a/server/src/main/java/org/opensearch/index/ReplicationStats.java b/server/src/main/java/org/opensearch/index/ReplicationStats.java index 9cc6685c75f80..0ae4526365bf1 100644 --- a/server/src/main/java/org/opensearch/index/ReplicationStats.java +++ b/server/src/main/java/org/opensearch/index/ReplicationStats.java @@ -8,11 +8,9 @@ package org.opensearch.index; -import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; -import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.xcontent.ToXContentFragment; import org.opensearch.core.xcontent.XContentBuilder; @@ -76,9 +74,9 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.SEGMENT_REPLICATION); - builder.field(Fields.MAX_BYTES_BEHIND, new ByteSizeValue(maxBytesBehind).toString()); - builder.field(Fields.TOTAL_BYTES_BEHIND, new ByteSizeValue(totalBytesBehind).toString()); - builder.field(Fields.MAX_REPLICATION_LAG, new TimeValue(maxReplicationLag)); + builder.field(Fields.MAX_BYTES_BEHIND, maxBytesBehind); + builder.field(Fields.TOTAL_BYTES_BEHIND, totalBytesBehind); + builder.field(Fields.MAX_REPLICATION_LAG, maxReplicationLag); builder.endObject(); return builder; } diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java index 4284daf9ffef4..d9d480e7b2b27 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationPressureService.java @@ -106,10 +106,11 @@ public SegmentReplicationPressureService( ClusterService clusterService, IndicesService indicesService, ShardStateAction shardStateAction, + SegmentReplicationStatsTracker tracker, ThreadPool threadPool ) { this.indicesService = indicesService; - this.tracker = new SegmentReplicationStatsTracker(this.indicesService); + this.tracker = tracker; this.shardStateAction = shardStateAction; this.threadPool = threadPool; diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java new file mode 100644 index 0000000000000..492f253bbcb7c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationRejectionStats.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.Version; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Segment replication rejection stats. + * + * @opensearch.internal + */ +public class SegmentReplicationRejectionStats implements Writeable, ToXContentFragment { + + /** + * Total rejections due to segment replication backpressure + */ + private long totalRejectionCount; + + public SegmentReplicationRejectionStats(final long totalRejectionCount) { + this.totalRejectionCount = totalRejectionCount; + } + + public SegmentReplicationRejectionStats(StreamInput in) throws IOException { + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + this.totalRejectionCount = in.readVLong(); + } + } + + public long getTotalRejectionCount() { + return totalRejectionCount; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("segment_replication_backpressure"); + builder.field("total_rejected_requests", totalRejectionCount); + return builder.endObject(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeVLong(totalRejectionCount); + } + } + + @Override + public String toString() { + return "SegmentReplicationRejectionStats{ totalRejectedRequestCount=" + totalRejectionCount + '}'; + } + +} diff --git a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java index 6d5c00c08caff..f5fc8aa1c1eea 100644 --- a/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java +++ b/server/src/main/java/org/opensearch/index/SegmentReplicationStatsTracker.java @@ -33,6 +33,14 @@ public SegmentReplicationStatsTracker(IndicesService indicesService) { rejectionCount = ConcurrentCollections.newConcurrentMap(); } + public SegmentReplicationRejectionStats getTotalRejectionStats() { + return new SegmentReplicationRejectionStats(this.rejectionCount.values().stream().mapToInt(AtomicInteger::get).sum()); + } + + protected Map getRejectionCount() { + return rejectionCount; + } + public SegmentReplicationStats getStats() { Map stats = new HashMap<>(); for (IndexService indexService : indicesService) { diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index ac9cf35d1d8e5..9541d13421e27 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -40,12 +40,10 @@ import java.util.ArrayList; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -146,6 +144,11 @@ public RemoteRestoreResult restore( || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { + // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to + // restore + if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { + throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); + } remoteMetadata = remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID); remoteMetadata.getIndices().values().forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); @@ -158,12 +161,21 @@ public RemoteRestoreResult restore( IndexMetadata indexMetadata = currentState.metadata().index(indexName); if (indexMetadata == null) { logger.warn("Index restore is not supported for non-existent index. Skipping: {}", indexName); + } else if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false) == false) { + logger.warn("Remote store is not enabled for index: {}", indexName); + } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { + throw new IllegalStateException( + String.format( + Locale.ROOT, + "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster.", + indexName + ) + " Close the existing index." + ); } else { indexMetadataMap.put(indexName, new Tuple<>(false, indexMetadata)); } } } - validate(currentState, indexMetadataMap, restoreClusterUUID, restoreAllShards); return executeRestore(currentState, indexMetadataMap, restoreAllShards, remoteMetadata); } @@ -183,6 +195,7 @@ private RemoteRestoreResult executeRestore( final String restoreUUID = UUIDs.randomBase64UUID(); List indicesToBeRestored = new ArrayList<>(); int totalShards = 0; + boolean metadataFromRemoteStore = false; ClusterState.Builder builder = ClusterState.builder(currentState); Metadata.Builder mdBuilder = Metadata.builder(currentState.metadata()); ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()); @@ -190,7 +203,7 @@ private RemoteRestoreResult executeRestore( for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { String indexName = indexMetadataEntry.getKey(); IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); + metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); IndexMetadata updatedIndexMetadata = indexMetadata; if (metadataFromRemoteStore == false && restoreAllShards) { updatedIndexMetadata = IndexMetadata.builder(indexMetadata) @@ -204,27 +217,23 @@ private RemoteRestoreResult executeRestore( IndexId indexId = new IndexId(indexName, updatedIndexMetadata.getIndexUUID()); - Map indexShardRoutingTableMap = new HashMap<>(); if (metadataFromRemoteStore == false) { - indexShardRoutingTableMap = currentState.routingTable() + Map indexShardRoutingTableMap = currentState.routingTable() .index(indexName) .shards() .values() .stream() .collect(Collectors.toMap(IndexShardRoutingTable::shardId, Function.identity())); + + RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( + restoreUUID, + updatedIndexMetadata.getCreationVersion(), + indexId + ); + + rtBuilder.addAsRemoteStoreRestore(updatedIndexMetadata, recoverySource, indexShardRoutingTableMap, restoreAllShards); } - RecoverySource.RemoteStoreRecoverySource recoverySource = new RecoverySource.RemoteStoreRecoverySource( - restoreUUID, - updatedIndexMetadata.getCreationVersion(), - indexId - ); - rtBuilder.addAsRemoteStoreRestore( - updatedIndexMetadata, - recoverySource, - indexShardRoutingTableMap, - restoreAllShards || metadataFromRemoteStore - ); blocks.updateBlocks(updatedIndexMetadata); mdBuilder.put(updatedIndexMetadata, true); indicesToBeRestored.add(indexName); @@ -239,7 +248,10 @@ private RemoteRestoreResult executeRestore( RoutingTable rt = rtBuilder.build(); ClusterState updatedState = builder.metadata(mdBuilder).blocks(blocks).routingTable(rt).build(); - return RemoteRestoreResult.build(restoreUUID, restoreInfo, allocationService.reroute(updatedState, "restored from remote store")); + if (metadataFromRemoteStore == false) { + updatedState = allocationService.reroute(updatedState, "restored from remote store"); + } + return RemoteRestoreResult.build(restoreUUID, restoreInfo, updatedState); } private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMetadata) { @@ -272,83 +284,6 @@ private void restoreGlobalMetadata(Metadata.Builder mdBuilder, Metadata remoteMe repositoriesMetadata.ifPresent(metadata -> mdBuilder.putCustom(RepositoriesMetadata.TYPE, metadata)); } - /** - * Performs various validations needed before executing restore - * @param currentState current cluster state - * @param indexMetadataMap map of index metadata to restore - * @param restoreClusterUUID cluster UUID used to restore IndexMetadata - * @param restoreAllShards indicates if all shards of the index needs to be restored. This flat is ignored if remoteClusterUUID is provided - */ - private void validate( - ClusterState currentState, - Map> indexMetadataMap, - @Nullable String restoreClusterUUID, - boolean restoreAllShards - ) throws IllegalStateException, IllegalArgumentException { - String errorMsg = "cannot restore index [%s] because an open index with same name/uuid already exists in the cluster."; - - // Restore with current cluster UUID will fail as same indices would be present in the cluster which we are trying to - // restore - if (currentState.metadata().clusterUUID().equals(restoreClusterUUID)) { - throw new IllegalArgumentException("clusterUUID to restore from should be different from current cluster UUID"); - } - for (Map.Entry> indexMetadataEntry : indexMetadataMap.entrySet()) { - String indexName = indexMetadataEntry.getKey(); - IndexMetadata indexMetadata = indexMetadataEntry.getValue().v2(); - String indexUUID = indexMetadata.getIndexUUID(); - boolean metadataFromRemoteStore = indexMetadataEntry.getValue().v1(); - if (indexMetadata.getSettings().getAsBoolean(SETTING_REMOTE_STORE_ENABLED, false)) { - if (metadataFromRemoteStore) { - Set graveyardIndexNames = new HashSet<>(); - Set graveyardIndexUUID = new HashSet<>(); - Set liveClusterIndexUUIDs = currentState.metadata() - .indices() - .values() - .stream() - .map(IndexMetadata::getIndexUUID) - .collect(Collectors.toSet()); - - currentState.metadata().indexGraveyard().getTombstones().forEach(tombstone -> { - graveyardIndexNames.add(tombstone.getIndex().getName()); - graveyardIndexUUID.add(tombstone.getIndex().getUUID()); - }); - - // Since updates to graveyard are synced to remote we should neven land in a situation where remote contain index - // metadata for graveyard index. - assert graveyardIndexNames.contains(indexName) == false : String.format( - Locale.ROOT, - "Index name [%s] exists in graveyard!", - indexName - ); - assert graveyardIndexUUID.contains(indexUUID) == false : String.format( - Locale.ROOT, - "Index UUID [%s] exists in graveyard!", - indexUUID - ); - - // Any indices being restored from remote cluster state should not already be part of the cluster as this causes - // conflict - boolean sameNameIndexExists = currentState.metadata().hasIndex(indexName); - boolean sameUUIDIndexExists = liveClusterIndexUUIDs.contains(indexUUID); - if (sameNameIndexExists || sameUUIDIndexExists) { - String finalErrorMsg = String.format(Locale.ROOT, errorMsg, indexName); - logger.info(finalErrorMsg); - throw new IllegalStateException(finalErrorMsg); - } - - boolean isHidden = IndexMetadata.INDEX_HIDDEN_SETTING.get(indexMetadata.getSettings()); - createIndexService.validateIndexName(indexName, currentState); - createIndexService.validateDotIndex(indexName, isHidden); - shardLimitValidator.validateShardLimit(indexName, indexMetadata.getSettings(), currentState); - } else if (restoreAllShards && IndexMetadata.State.CLOSE.equals(indexMetadata.getState()) == false) { - throw new IllegalStateException(String.format(Locale.ROOT, errorMsg, indexName) + " Close the existing index."); - } - } else { - logger.warn("Remote store is not enabled for index: {}", indexName); - } - } - } - /** * Result of a remote restore operation. */ diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 2a703f17aa953..fb65d9ef83be2 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -27,6 +27,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -66,6 +67,12 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long remoteRefreshTimeMs; + /** + * This is the time of first local refresh after the last successful remote refresh. When the remote store is in + * sync with local refresh, this will be reset to -1. + */ + private volatile long remoteRefreshStartTimeMs = -1; + /** * The refresh time(clock) of most recent remote refresh. */ @@ -76,11 +83,6 @@ public class RemoteSegmentTransferTracker extends RemoteTransferTracker { */ private volatile long refreshSeqNoLag; - /** - * Keeps the time (ms) lag computed so that we do not compute it for every request. - */ - private volatile long timeMsLag; - /** * Keeps track of the total bytes of segment files which were uploaded to remote store during last successful remote refresh */ @@ -132,14 +134,19 @@ public RemoteSegmentTransferTracker( logger = Loggers.getLogger(getClass(), shardId); // Both the local refresh time and remote refresh time are set with current time to give consistent view of time lag when it arises. long currentClockTimeMs = System.currentTimeMillis(); - long currentTimeMs = System.nanoTime() / 1_000_000L; + long currentTimeMs = currentTimeMsUsingSystemNanos(); localRefreshTimeMs = currentTimeMs; remoteRefreshTimeMs = currentTimeMs; + remoteRefreshStartTimeMs = currentTimeMs; localRefreshClockTimeMs = currentClockTimeMs; remoteRefreshClockTimeMs = currentClockTimeMs; this.directoryFileTransferTracker = directoryFileTransferTracker; } + public static long currentTimeMsUsingSystemNanos() { + return TimeUnit.NANOSECONDS.toMillis(System.nanoTime()); + } + @Override public void incrementTotalUploadsFailed() { super.incrementTotalUploadsFailed(); @@ -180,19 +187,22 @@ public long getLocalRefreshClockTimeMs() { */ public void updateLocalRefreshTimeAndSeqNo() { updateLocalRefreshClockTimeMs(System.currentTimeMillis()); - updateLocalRefreshTimeMs(System.nanoTime() / 1_000_000L); + updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); updateLocalRefreshSeqNo(getLocalRefreshSeqNo() + 1); } // Visible for testing - void updateLocalRefreshTimeMs(long localRefreshTimeMs) { + synchronized void updateLocalRefreshTimeMs(long localRefreshTimeMs) { assert localRefreshTimeMs >= this.localRefreshTimeMs : "newLocalRefreshTimeMs=" + localRefreshTimeMs + " < " + "currentLocalRefreshTimeMs=" + this.localRefreshTimeMs; + boolean isRemoteInSyncBeforeLocalRefresh = this.localRefreshTimeMs == this.remoteRefreshTimeMs; this.localRefreshTimeMs = localRefreshTimeMs; - computeTimeMsLag(); + if (isRemoteInSyncBeforeLocalRefresh) { + this.remoteRefreshStartTimeMs = localRefreshTimeMs; + } } private void updateLocalRefreshClockTimeMs(long localRefreshClockTimeMs) { @@ -221,14 +231,18 @@ long getRemoteRefreshClockTimeMs() { return remoteRefreshClockTimeMs; } - public void updateRemoteRefreshTimeMs(long remoteRefreshTimeMs) { - assert remoteRefreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" - + remoteRefreshTimeMs + public synchronized void updateRemoteRefreshTimeMs(long refreshTimeMs) { + assert refreshTimeMs >= this.remoteRefreshTimeMs : "newRemoteRefreshTimeMs=" + + refreshTimeMs + " < " + "currentRemoteRefreshTimeMs=" + this.remoteRefreshTimeMs; - this.remoteRefreshTimeMs = remoteRefreshTimeMs; - computeTimeMsLag(); + this.remoteRefreshTimeMs = refreshTimeMs; + // When multiple refreshes have failed, there is a possibility that retry is ongoing while another refresh gets + // triggered. After the segments have been uploaded and before the below code runs, the updateLocalRefreshTimeAndSeqNo + // method is triggered, which will update the local localRefreshTimeMs. Now, the lag would basically become the + // time since the last refresh happened locally. + this.remoteRefreshStartTimeMs = refreshTimeMs == this.localRefreshTimeMs ? -1 : this.localRefreshTimeMs; } public void updateRemoteRefreshClockTimeMs(long remoteRefreshClockTimeMs) { @@ -243,12 +257,11 @@ public long getRefreshSeqNoLag() { return refreshSeqNoLag; } - private void computeTimeMsLag() { - timeMsLag = localRefreshTimeMs - remoteRefreshTimeMs; - } - public long getTimeMsLag() { - return timeMsLag; + if (remoteRefreshTimeMs == localRefreshTimeMs) { + return 0; + } + return currentTimeMsUsingSystemNanos() - remoteRefreshStartTimeMs; } public long getBytesLag() { @@ -354,7 +367,7 @@ public RemoteSegmentTransferTracker.Stats stats() { shardId, localRefreshClockTimeMs, remoteRefreshClockTimeMs, - timeMsLag, + getTimeMsLag(), localRefreshSeqNo, remoteRefreshSeqNo, uploadBytesStarted.get(), diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java index 2920b33921869..33cd40f802d43 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteStorePressureService.java @@ -180,7 +180,6 @@ public boolean validate(RemoteSegmentTransferTracker pressureTracker, ShardId sh return true; } if (pressureTracker.isUploadTimeMovingAverageReady() == false) { - logger.trace("upload time moving average is not ready"); return true; } long timeLag = pressureTracker.getTimeMsLag(); diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java index 1a9896540212e..4214a87049350 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteTranslogTransferTracker.java @@ -232,6 +232,63 @@ public RemoteTranslogTransferTracker.Stats stats() { ); } + @Override + public String toString() { + return "RemoteTranslogTransferStats{" + + "lastSuccessfulUploadTimestamp=" + + lastSuccessfulUploadTimestamp.get() + + "," + + "totalUploadsStarted=" + + totalUploadsStarted.get() + + "," + + "totalUploadsSucceeded=" + + totalUploadsSucceeded.get() + + "," + + "totalUploadsFailed=" + + totalUploadsFailed.get() + + "," + + "uploadBytesStarted=" + + uploadBytesStarted.get() + + "," + + "uploadBytesFailed=" + + uploadBytesFailed.get() + + "," + + "totalUploadTimeInMillis=" + + totalUploadTimeInMillis.get() + + "," + + "uploadBytesMovingAverage=" + + uploadBytesMovingAverageReference.get().getAverage() + + "," + + "uploadBytesPerSecMovingAverage=" + + uploadBytesPerSecMovingAverageReference.get().getAverage() + + "," + + "uploadTimeMovingAverage=" + + uploadTimeMsMovingAverageReference.get().getAverage() + + "," + + "lastSuccessfulDownloadTimestamp=" + + lastSuccessfulDownloadTimestamp.get() + + "," + + "totalDownloadsSucceeded=" + + totalDownloadsSucceeded.get() + + "," + + "downloadBytesSucceeded=" + + downloadBytesSucceeded.get() + + "," + + "totalDownloadTimeInMillis=" + + totalDownloadTimeInMillis.get() + + "," + + "downloadBytesMovingAverage=" + + downloadBytesMovingAverageReference.get().getAverage() + + "," + + "downloadBytesPerSecMovingAverage=" + + downloadBytesPerSecMovingAverageReference.get().getAverage() + + "," + + "downloadTimeMovingAverage=" + + downloadTimeMsMovingAverageReference.get().getAverage() + + "," + + "}"; + } + /** * Represents the tracker's state as seen in the stats API. * diff --git a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java index f74fc7eefe65c..ca3c7e1d49700 100644 --- a/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java +++ b/server/src/main/java/org/opensearch/index/seqno/RetentionLeaseSyncAction.java @@ -62,6 +62,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportException; import org.opensearch.transport.TransportResponseHandler; @@ -99,7 +100,8 @@ public RetentionLeaseSyncAction( final ShardStateAction shardStateAction, final ActionFilters actionFilters, final IndexingPressureService indexingPressureService, - final SystemIndices systemIndices + final SystemIndices systemIndices, + final Tracer tracer ) { super( settings, @@ -115,7 +117,8 @@ public RetentionLeaseSyncAction( ignore -> ThreadPool.Names.MANAGEMENT, false, indexingPressureService, - systemIndices + systemIndices, + tracer ); } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 5ebfd3863a6cf..3c348035ebbdd 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -640,7 +640,7 @@ public void updateShardState( if (currentRouting.initializing() && currentRouting.isRelocationTarget() == false && newRouting.active()) { // the cluster-manager started a recovering primary, activate primary mode. replicationTracker.activatePrimaryMode(getLocalCheckpoint()); - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); } } else { assert currentRouting.primary() == false : "term is only increased as part of primary promotion"; @@ -711,8 +711,7 @@ public void updateShardState( // are brought up to date. checkpointPublisher.publish(this, getLatestReplicationCheckpoint()); } - - ensurePeerRecoveryRetentionLeasesExist(); + postActivatePrimaryMode(); /* * If this shard was serving as a replica shard when another shard was promoted to primary then * its Lucene index was reset during the primary term transition. In particular, the Lucene index @@ -3010,7 +3009,7 @@ public ReplicationStats getReplicationStats() { long maxBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).max().orElse(0L); long totalBytesBehind = stats.stream().mapToLong(SegmentReplicationShardStats::getBytesBehindCount).sum(); long maxReplicationLag = stats.stream() - .mapToLong(SegmentReplicationShardStats::getCurrentReplicationTimeMillis) + .mapToLong(SegmentReplicationShardStats::getCurrentReplicationLagMillis) .max() .orElse(0L); return new ReplicationStats(maxBytesBehind, totalBytesBehind, maxReplicationLag); @@ -3393,6 +3392,20 @@ assert getLocalCheckpoint() == primaryContext.getCheckpointStates().get(routingE synchronized (mutex) { replicationTracker.activateWithPrimaryContext(primaryContext); // make changes to primaryMode flag only under mutex } + postActivatePrimaryMode(); + } + + private void postActivatePrimaryMode() { + if (indexSettings.isRemoteStoreEnabled()) { + // We make sure to upload translog (even if it does not contain any operations) to remote translog. + // This helps to get a consistent state in remote store where both remote segment store and remote + // translog contains data. + try { + getEngine().translogManager().syncTranslog(); + } catch (IOException e) { + logger.error("Failed to sync translog to remote from new primary", e); + } + } ensurePeerRecoveryRetentionLeasesExist(); } @@ -4425,7 +4438,6 @@ public final boolean isSearchIdle() { } /** - * * Returns true if this shard supports search idle. *

* Indices using Segment Replication will ignore search idle unless there are no replicas. @@ -4434,6 +4446,11 @@ public final boolean isSearchIdle() { * a new set of segments. */ public final boolean isSearchIdleSupported() { + // If the index is remote store backed, then search idle is not supported. This is to ensure that async refresh + // task continues to upload to remote store periodically. + if (isRemoteTranslogEnabled()) { + return false; + } return indexSettings.isSegRepEnabled() == false || indexSettings.getNumberOfReplicas() == 0; } @@ -4770,6 +4787,8 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal) throws IOE * @throws IOException if exception occurs while reading segments from remote store. */ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runnable onFileSync) throws IOException { + boolean syncSegmentSuccess = false; + long startTimeMs = System.currentTimeMillis(); assert indexSettings.isRemoteStoreEnabled(); logger.trace("Downloading segments from remote segment store"); RemoteSegmentStoreDirectory remoteDirectory = getRemoteDirectory(); @@ -4819,9 +4838,15 @@ public void syncSegmentsFromRemoteSegmentStore(boolean overrideLocal, final Runn : "There should not be any segments file in the dir"; store.commitSegmentInfos(infosSnapshot, processedLocalCheckpoint, processedLocalCheckpoint); } + syncSegmentSuccess = true; } catch (IOException e) { throw new IndexShardRecoveryException(shardId, "Exception while copying segment files from remote segment store", e); } finally { + logger.trace( + "syncSegmentsFromRemoteSegmentStore success={} elapsedTime={}", + syncSegmentSuccess, + (System.currentTimeMillis() - startTimeMs) + ); store.decRef(); remoteStore.decRef(); } diff --git a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java index c650edc31da8d..3e97b07abfb5d 100644 --- a/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/shard/RemoteStoreRefreshListener.java @@ -123,14 +123,13 @@ public void beforeRefresh() throws IOException {} @Override protected void runAfterRefreshExactlyOnce(boolean didRefresh) { - if (shouldSync(didRefresh)) { + // We have 2 separate methods to check if sync needs to be done or not. This is required since we use the return boolean + // from isReadyForUpload to schedule refresh retries as the index shard or the primary mode are not in complete + // ready state. + if (shouldSync(didRefresh) && isReadyForUpload()) { segmentTracker.updateLocalRefreshTimeAndSeqNo(); try { - if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { - logger.debug("primaryTerm update from={} to={}", primaryTerm, indexShard.getOperationPrimaryTerm()); - this.primaryTerm = indexShard.getOperationPrimaryTerm(); - this.remoteDirectory.init(); - } + initializeRemoteDirectoryOnTermUpdate(); try (GatedCloseable segmentInfosGatedCloseable = indexShard.getSegmentInfosSnapshot()) { Collection localSegmentsPostRefresh = segmentInfosGatedCloseable.get().files(true); updateLocalSizeMapAndTracker(localSegmentsPostRefresh); @@ -160,20 +159,20 @@ protected boolean performAfterRefreshWithPermit(boolean didRefresh) { } private boolean shouldSync(boolean didRefresh) { - // The third condition exists for uploading the zero state segments where the refresh has not changed the reader reference, but it - // is important to upload the zero state segments so that the restore does not break. return this.primaryTerm != indexShard.getOperationPrimaryTerm() + // If the readers change, didRefresh is always true. || didRefresh - || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty(); + // The third condition exists for uploading the zero state segments where the refresh has not changed the reader + // reference, but it is important to upload the zero state segments so that the restore does not break. + || remoteDirectory.getSegmentsUploadedToRemoteStore().isEmpty() + // When the shouldSync is called the first time, then 1st condition on primary term is true. But after that + // we update the primary term and the same condition would not evaluate to true again in syncSegments. + // Below check ensures that if there is commit, then that gets picked up by both 1st and 2nd shouldSync call. + || isRefreshAfterCommitSafe(); } private boolean syncSegments() { - if (indexShard.getReplicationTracker().isPrimaryMode() == false || indexShard.state() == IndexShardState.CLOSED) { - logger.debug( - "Skipped syncing segments with primaryMode={} indexShardState={}", - indexShard.getReplicationTracker().isPrimaryMode(), - indexShard.state() - ); + if (isReadyForUpload() == false) { // Following check is required to enable retry and make sure that we do not lose this refresh event // When primary shard is restored from remote store, the recovery happens first followed by changing // primaryMode to true. Due to this, the refresh that is triggered post replay of translog will not go through @@ -323,6 +322,19 @@ private boolean isRefreshAfterCommit() throws IOException { && !remoteDirectory.containsFile(lastCommittedLocalSegmentFileName, getChecksumOfLocalFile(lastCommittedLocalSegmentFileName))); } + /** + * Returns if the current refresh has happened after a commit. + * @return true if this refresh has happened on account of a commit. If otherwise or exception, returns false. + */ + private boolean isRefreshAfterCommitSafe() { + try { + return isRefreshAfterCommit(); + } catch (Exception e) { + logger.info("Exception occurred in isRefreshAfterCommitSafe", e); + } + return false; + } + void uploadMetadata(Collection localSegmentsPostRefresh, SegmentInfos segmentInfos, ReplicationCheckpoint replicationCheckpoint) throws IOException { final long maxSeqNo = ((InternalEngine) indexShard.getEngine()).currentOngoingRefreshCheckpoint(); @@ -439,6 +451,48 @@ private void updateFinalStatusInSegmentTracker(boolean uploadStatus, long bytesB } } + /** + * On primary term update, we (re)initialise the remote segment directory to reflect the latest metadata file that + * has been uploaded to remote store successfully. This method also updates the segment tracker about the latest + * uploaded segment files onto remote store. + */ + private void initializeRemoteDirectoryOnTermUpdate() throws IOException { + if (this.primaryTerm != indexShard.getOperationPrimaryTerm()) { + logger.trace("primaryTerm update from={} to={}", primaryTerm, indexShard.getOperationPrimaryTerm()); + this.primaryTerm = indexShard.getOperationPrimaryTerm(); + RemoteSegmentMetadata uploadedMetadata = this.remoteDirectory.init(); + + // During failover, the uploaded metadata would have names of files that have been uploaded to remote store. + // Here we update the tracker with latest remote uploaded files. + if (uploadedMetadata != null) { + segmentTracker.setLatestUploadedFiles(uploadedMetadata.getMetadata().keySet()); + } + } + } + + /** + * This checks for readiness of the index shard and primary mode. This has separated from shouldSync since we use the + * returned value of this method for scheduling retries in syncSegments method. + * @return true iff primaryMode is true and index shard is not in closed state. + */ + private boolean isReadyForUpload() { + boolean isReady = indexShard.getReplicationTracker().isPrimaryMode() && indexShard.state() != IndexShardState.CLOSED; + if (isReady == false) { + StringBuilder sb = new StringBuilder("Skipped syncing segments with"); + if (indexShard.getReplicationTracker() != null) { + sb.append(" primaryMode=").append(indexShard.getReplicationTracker().isPrimaryMode()); + } + if (indexShard.state() != null) { + sb.append(" indexShardState=").append(indexShard.state()); + } + if (indexShard.getEngineOrNull() != null) { + sb.append(" engineType=").append(indexShard.getEngine().getClass().getSimpleName()); + } + logger.trace(sb.toString()); + } + return isReady; + } + /** * Creates an {@link UploadListener} containing the stats population logic which would be triggered before and after segment upload events */ diff --git a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java index 6b43fed3d8930..be1f2341236ab 100644 --- a/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java +++ b/server/src/main/java/org/opensearch/index/store/RemoteSegmentStoreDirectory.java @@ -290,10 +290,6 @@ public void setWrittenByMajor(int writtenByMajor) { ); } } - - public int getWrittenByMajor() { - return writtenByMajor; - } } /** diff --git a/server/src/main/java/org/opensearch/index/store/Store.java b/server/src/main/java/org/opensearch/index/store/Store.java index d0cd2635ba672..b822742de6e97 100644 --- a/server/src/main/java/org/opensearch/index/store/Store.java +++ b/server/src/main/java/org/opensearch/index/store/Store.java @@ -105,7 +105,6 @@ import java.io.UncheckedIOException; import java.nio.file.NoSuchFileException; import java.nio.file.Path; -import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; @@ -121,7 +120,6 @@ import java.util.zip.CRC32; import java.util.zip.Checksum; -import static java.lang.Character.MAX_RADIX; import static java.util.Collections.emptyMap; import static java.util.Collections.unmodifiableMap; import static org.opensearch.index.seqno.SequenceNumbers.LOCAL_CHECKPOINT_KEY; @@ -977,11 +975,7 @@ public void copyFrom(Directory from, String src, String dest, IOContext context) boolean success = false; long startTime = System.currentTimeMillis(); try { - if (from instanceof RemoteSegmentStoreDirectory) { - copyFileAndValidateChecksum(from, src, dest, context, fileSize); - } else { - super.copyFrom(from, src, dest, context); - } + super.copyFrom(from, src, dest, context); success = true; afterDownload(fileSize, startTime); } finally { @@ -991,43 +985,6 @@ public void copyFrom(Directory from, String src, String dest, IOContext context) } } - private void copyFileAndValidateChecksum(Directory from, String src, String dest, IOContext context, long fileSize) - throws IOException { - RemoteSegmentStoreDirectory.UploadedSegmentMetadata metadata = ((RemoteSegmentStoreDirectory) from) - .getSegmentsUploadedToRemoteStore() - .get(dest); - boolean success = false; - try (IndexInput is = from.openInput(src, context); IndexOutput os = createOutput(dest, context)) { - // Here, we don't need the exact version as LuceneVerifyingIndexOutput does not verify version - // It is just used to emit logs when the entire metadata object is provided as parameter. Also, - // we can't provide null version as StoreFileMetadata has non-null check on writtenBy field. - Version luceneMajorVersion = Version.parse(metadata.getWrittenByMajor() + ".0.0"); - long checksum = Long.parseLong(metadata.getChecksum()); - StoreFileMetadata storeFileMetadata = new StoreFileMetadata( - dest, - fileSize, - Long.toString(checksum, MAX_RADIX), - luceneMajorVersion - ); - VerifyingIndexOutput verifyingIndexOutput = new LuceneVerifyingIndexOutput(storeFileMetadata, os); - verifyingIndexOutput.copyBytes(is, is.length()); - verifyingIndexOutput.verify(); - success = true; - } catch (ParseException e) { - throw new IOException("Exception while reading version info for segment file from remote store: " + dest, e); - } finally { - if (success == false) { - // If the exception is thrown after file is created, we clean up the file. - // We ignore the exception as the deletion is best-effort basis and can fail if file does not exist. - try { - deleteFile("Quietly deleting", dest); - } catch (Exception e) { - // Ignore - } - } - } - } - /** * Updates the amount of bytes attempted for download */ @@ -1519,7 +1476,7 @@ public static boolean isAutogenerated(String name) { * Produces a string representation of the given digest value. */ public static String digestToString(long digest) { - return Long.toString(digest, MAX_RADIX); + return Long.toString(digest, Character.MAX_RADIX); } /** diff --git a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java index 85c52b907d326..4d0fc13d433c6 100644 --- a/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java +++ b/server/src/main/java/org/opensearch/index/translog/InternalTranslogManager.java @@ -430,10 +430,10 @@ public String getTranslogUUID() { * @return if the translog should be flushed */ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long flushThreshold) { - final long translogGenerationOfLastCommit = translog.getMinGenerationForSeqNo( - localCheckpointOfLastCommit + 1 - ).translogFileGeneration; - if (translog.sizeInBytesByMinGen(translogGenerationOfLastCommit) < flushThreshold) { + // This is the minimum seqNo that is referred in translog and considered for calculating translog size + long minTranslogRefSeqNo = translog.getMinUnreferencedSeqNoInSegments(localCheckpointOfLastCommit + 1); + final long minReferencedTranslogGeneration = translog.getMinGenerationForSeqNo(minTranslogRefSeqNo).translogFileGeneration; + if (translog.sizeInBytesByMinGen(minReferencedTranslogGeneration) < flushThreshold) { return false; } /* @@ -454,7 +454,7 @@ public boolean shouldPeriodicallyFlush(long localCheckpointOfLastCommit, long fl final long translogGenerationOfNewCommit = translog.getMinGenerationForSeqNo( localCheckpointTrackerSupplier.get().getProcessedCheckpoint() + 1 ).translogFileGeneration; - return translogGenerationOfLastCommit < translogGenerationOfNewCommit + return minReferencedTranslogGeneration < translogGenerationOfNewCommit || localCheckpointTrackerSupplier.get().getProcessedCheckpoint() == localCheckpointTrackerSupplier.get().getMaxSeqNo(); } diff --git a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java index 29c825fd383c5..a305a774f5854 100644 --- a/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java +++ b/server/src/main/java/org/opensearch/index/translog/RemoteFsTranslog.java @@ -161,6 +161,7 @@ public static void download(Repository repository, ShardId shardId, ThreadPool t remoteTranslogTransferTracker ); RemoteFsTranslog.download(translogTransferManager, location, logger); + logger.trace(remoteTranslogTransferTracker.toString()); } static void download(TranslogTransferManager translogTransferManager, Path location, Logger logger) throws IOException { @@ -173,15 +174,20 @@ static void download(TranslogTransferManager translogTransferManager, Path locat */ IOException ex = null; for (int i = 0; i <= DOWNLOAD_RETRIES; i++) { + boolean success = false; + long startTimeMs = System.currentTimeMillis(); try { downloadOnce(translogTransferManager, location, logger); + success = true; return; } catch (FileNotFoundException | NoSuchFileException e) { // continue till download retries ex = e; + } finally { + logger.trace("downloadOnce success={} timeElapsed={}", success, (System.currentTimeMillis() - startTimeMs)); } } - logger.debug("Exhausted all download retries during translog/checkpoint file download"); + logger.info("Exhausted all download retries during translog/checkpoint file download"); throw ex; } @@ -425,7 +431,7 @@ public void trimUnreferencedReaders() throws IOException { // cleans up remote translog files not referenced in latest uploaded metadata. // This enables us to restore translog from the metadata in case of failover or relocation. Set generationsToDelete = new HashSet<>(); - for (long generation = minRemoteGenReferenced - 1; generation >= 0; generation--) { + for (long generation = minRemoteGenReferenced - 1 - indexSettings().getRemoteTranslogExtraKeep(); generation >= 0; generation--) { if (fileTransferTracker.uploaded(Translog.getFilename(generation)) == false) { break; } @@ -544,4 +550,9 @@ public void onUploadFailed(TransferSnapshot transferSnapshot, Exception ex) thro } } } + + @Override + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minSeqNoToKeep; + } } diff --git a/server/src/main/java/org/opensearch/index/translog/Translog.java b/server/src/main/java/org/opensearch/index/translog/Translog.java index cf7f18840a03e..b44aa6e059224 100644 --- a/server/src/main/java/org/opensearch/index/translog/Translog.java +++ b/server/src/main/java/org/opensearch/index/translog/Translog.java @@ -2034,4 +2034,8 @@ public static String createEmptyTranslog( writer.close(); return uuid; } + + public long getMinUnreferencedSeqNoInSegments(long minUnrefCheckpointInLastCommit) { + return minUnrefCheckpointInLastCommit; + } } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 50c551c2be29b..36abc77893d81 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -299,6 +299,17 @@ public class IndicesService extends AbstractLifecycleComponent Property.Final ); + /** + * This setting is used to restrict creation of index where the 'index.replication.type' index setting is set. + * If disabled, the replication type can be specified. + */ + public static final Setting CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING = Setting.boolSetting( + "cluster.restrict.index.replication_type", + false, + Property.NodeScope, + Property.Final + ); + /** * The node's settings. */ diff --git a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java index 6c7632a8a408d..cb2bedf00de99 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java +++ b/server/src/main/java/org/opensearch/indices/recovery/PeerRecoverySourceService.java @@ -376,7 +376,8 @@ private Tuple createRecovery transportService, request.targetNode(), recoverySettings, - throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime) + throttleTime -> shard.recoveryStats().addThrottleTime(throttleTime), + shard.isRemoteTranslogEnabled() ); handler = RecoverySourceHandlerFactory.create(shard, recoveryTarget, request, recoverySettings); return Tuple.tuple(handler, recoveryTarget); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java index 44dfb2f4cb00a..0f3025369833d 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RecoverySettings.java @@ -41,6 +41,7 @@ import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.core.common.unit.ByteSizeValue; @@ -87,10 +88,10 @@ public class RecoverySettings { /** * Controls the maximum number of streams that can be started concurrently per recovery when downloading from the remote store. */ - public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = Setting.intSetting( + public static final Setting INDICES_RECOVERY_MAX_CONCURRENT_REMOTE_STORE_STREAMS_SETTING = new Setting<>( "indices.recovery.max_concurrent_remote_store_streams", - 10, - 1, + (s) -> Integer.toString(Math.max(1, OpenSearchExecutors.allocatedProcessors(s) / 2)), + (s) -> Setting.parseInt(s, 1, "indices.recovery.max_concurrent_remote_store_streams"), Property.Dynamic, Property.NodeScope ); diff --git a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java index 66f5b13449f05..37227596fdfe7 100644 --- a/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java +++ b/server/src/main/java/org/opensearch/indices/recovery/RemoteRecoveryTargetHandler.java @@ -75,6 +75,7 @@ public class RemoteRecoveryTargetHandler implements RecoveryTargetHandler { private final AtomicLong requestSeqNoGenerator = new AtomicLong(0); private final RetryableTransportClient retryableTransportClient; private final RemoteSegmentFileChunkWriter fileChunkWriter; + private final boolean remoteStoreEnabled; public RemoteRecoveryTargetHandler( long recoveryId, @@ -82,7 +83,8 @@ public RemoteRecoveryTargetHandler( TransportService transportService, DiscoveryNode targetNode, RecoverySettings recoverySettings, - Consumer onSourceThrottle + Consumer onSourceThrottle, + boolean remoteStoreEnabled ) { this.transportService = transportService; // It is safe to pass the retry timeout value here because RemoteRecoveryTargetHandler @@ -111,6 +113,7 @@ public RemoteRecoveryTargetHandler( requestSeqNoGenerator, onSourceThrottle ); + this.remoteStoreEnabled = remoteStoreEnabled; } public DiscoveryNode targetNode() { @@ -129,7 +132,13 @@ public void prepareForTranslogOperations(int totalTranslogOps, ActionListener reader = in -> TransportResponse.Empty.INSTANCE; final ActionListener responseListener = ActionListener.map(listener, r -> null); - retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + if (remoteStoreEnabled) { + // If remote store is enabled, during the prepare_translog phase, translog is also downloaded on the + // target host along with incremental segments download. + retryableTransportClient.executeRetryableAction(action, request, translogOpsRequestOptions, responseListener, reader); + } else { + retryableTransportClient.executeRetryableAction(action, request, responseListener, reader); + } } @Override diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java index e2c47b0fb3159..674c09311c645 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSourceHandler.java @@ -12,8 +12,6 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.StepListener; import org.opensearch.cluster.node.DiscoveryNode; -import org.opensearch.cluster.routing.IndexShardRoutingTable; -import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.common.logging.Loggers; import org.opensearch.common.util.CancellableThreads; import org.opensearch.common.util.concurrent.ListenableFuture; @@ -22,7 +20,6 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; -import org.opensearch.indices.recovery.DelayRecoveryException; import org.opensearch.indices.recovery.FileChunkWriter; import org.opensearch.indices.recovery.MultiChunkTransfer; import org.opensearch.indices.replication.common.CopyState; @@ -146,12 +143,6 @@ public synchronized void sendFiles(GetSegmentFilesRequest request, ActionListene ); }; cancellableThreads.checkForCancel(); - final IndexShardRoutingTable routingTable = shard.getReplicationGroup().getRoutingTable(); - ShardRouting targetShardRouting = routingTable.getByAllocationId(request.getTargetAllocationId()); - if (targetShardRouting == null) { - logger.debug("delaying replication of {} as it is not listed as assigned to target node {}", shard.shardId(), targetNode); - throw new DelayRecoveryException("source node does not have the shard listed in its state as allocated on the node"); - } final StepListener sendFileStep = new StepListener<>(); Set storeFiles = new HashSet<>(Arrays.asList(shard.store().directory().listAll())); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 2e78f7a14b6c2..e80b768074fc7 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -136,6 +136,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.recovery.RemoteStoreRestoreService; @@ -993,6 +994,7 @@ protected Node( transportService.getTaskManager() ); + final SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); RepositoriesModule repositoriesModule = new RepositoriesModule( this.environment, pluginsService.filterPlugins(RepositoryPlugin.class), @@ -1132,6 +1134,7 @@ protected Node( fileCache, taskCancellationMonitoringService, resourceUsageCollectorService, + segmentReplicationStatsTracker, repositoryService ); @@ -1263,6 +1266,7 @@ protected Node( b.bind(MetricsRegistry.class).toInstance(metricsRegistry); b.bind(RemoteClusterStateService.class).toProvider(() -> remoteClusterStateService); b.bind(PersistedStateRegistry.class).toInstance(persistedStateRegistry); + b.bind(SegmentReplicationStatsTracker.class).toInstance(segmentReplicationStatsTracker); }); injector = modules.createInjector(); diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index e2d7bc2c86ba3..49dde0b81cac7 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -48,6 +48,7 @@ import org.opensearch.discovery.Discovery; import org.opensearch.http.HttpServerTransport; import org.opensearch.index.IndexingPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.indices.IndicesService; import org.opensearch.ingest.IngestService; @@ -96,6 +97,8 @@ public class NodeService implements Closeable { private final TaskCancellationMonitoringService taskCancellationMonitoringService; private final RepositoriesService repositoriesService; + private final SegmentReplicationStatsTracker segmentReplicationStatsTracker; + NodeService( Settings settings, ThreadPool threadPool, @@ -119,6 +122,7 @@ public class NodeService implements Closeable { FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, ResourceUsageCollectorService resourceUsageCollectorService, + SegmentReplicationStatsTracker segmentReplicationStatsTracker, RepositoriesService repositoriesService ) { this.settings = settings; @@ -146,6 +150,7 @@ public class NodeService implements Closeable { this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); + this.segmentReplicationStatsTracker = segmentReplicationStatsTracker; } public NodeInfo info( @@ -226,6 +231,7 @@ public NodeStats stats( boolean taskCancellation, boolean searchPipelineStats, boolean resourceUsageStats, + boolean segmentReplicationTrackerStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -256,6 +262,7 @@ public NodeStats stats( fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, searchPipelineStats ? this.searchPipelineService.stats() : null, + segmentReplicationTrackerStats ? this.segmentReplicationStatsTracker.getTotalRejectionStats() : null, repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 17cb68f798094..3e6052a5ef820 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -170,38 +170,94 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr * @param compressor whether to use compression */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { + write(obj, blobContainer, name, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + } + + /** + * Writes blob with resolving the blob name using {@link #blobName} method. + *

+ * The blob will optionally by compressed. + * + * @param obj object to be serialized + * @param blobContainer blob container + * @param name blob name + * @param compressor whether to use compression + * @param params ToXContent params + */ + public void write( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + final ToXContent.Params params + ) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); + final BytesReference bytes = serialize(obj, blobName, compressor, params); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } /** - * Writes blob with resolving the blob name using {@link #blobName} method. - * Leverages the multipart upload if supported by the blobContainer. + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#NORMAL} + */ + public void writeAsync( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + // use NORMAL priority by default + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.NORMAL, listener, params); + } + + /** + * Internally calls {@link #writeAsyncWithPriority} with {@link WritePriority#URGENT} + *

+ * NOTE: We use this method to upload urgent priority objects like cluster state to remote stores. + * Use {@link #writeAsync(ToXContent, BlobContainer, String, Compressor, ActionListener, ToXContent.Params)} for + * other use cases. + */ + public void writeAsyncWithUrgentPriority( + final T obj, + final BlobContainer blobContainer, + final String name, + final Compressor compressor, + ActionListener listener, + final ToXContent.Params params + ) throws IOException { + this.writeAsyncWithPriority(obj, blobContainer, name, compressor, WritePriority.URGENT, listener, params); + } + + /** + * Method to writes blob with resolving the blob name using {@link #blobName} method with specified + * {@link WritePriority}. Leverages the multipart upload if supported by the blobContainer. * * @param obj object to be serialized * @param blobContainer blob container * @param name blob name * @param compressor whether to use compression + * @param priority write priority to be used * @param listener listener to listen to write result * @param params ToXContent params */ - public void writeAsync( + private void writeAsyncWithPriority( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, + final WritePriority priority, ActionListener listener, final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { - write(obj, blobContainer, name, compressor); + write(obj, blobContainer, name, compressor, params); listener.onResponse(null); return; } final String blobName = blobName(name); final BytesReference bytes = serialize(obj, blobName, compressor, params); - final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; + final String resourceDescription = "ChecksumBlobStoreFormat.writeAsyncWithPriority(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; try { @@ -221,7 +277,7 @@ public void writeAsync( blobName, bytes.length(), true, - WritePriority.HIGH, + priority, (size, position) -> new OffsetRangeIndexInputStream(input, size, position), expectedChecksum, ((AsyncMultiStreamBlobContainer) blobContainer).remoteIntegrityCheckSupported() diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java index a9514c298ef88..b6b2cf360d1c5 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/AttributeNames.java @@ -69,4 +69,29 @@ private AttributeNames() { * Action Name. */ public static final String TRANSPORT_ACTION = "action"; + + /** + * Index Name + */ + public static final String INDEX = "index"; + + /** + * Shard ID + */ + public static final String SHARD_ID = "shard_id"; + + /** + * Number of request items in bulk request + */ + public static final String BULK_REQUEST_ITEMS = "bulk_request_items"; + + /** + * Node ID + */ + public static final String NODE_ID = "node_id"; + + /** + * Refresh Policy + */ + public static final String REFRESH_POLICY = "refresh_policy"; } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java index d97fbd371ab2a..1dce422943b7a 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/SpanBuilder.java @@ -8,6 +8,8 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.action.bulk.BulkShardRequest; +import org.opensearch.action.support.replication.ReplicatedWriteRequest; import org.opensearch.common.annotation.InternalApi; import org.opensearch.core.common.Strings; import org.opensearch.http.HttpRequest; @@ -68,6 +70,10 @@ public static SpanCreationContext from(String action, Transport.Connection conne return SpanCreationContext.server().name(createSpanName(action, connection)).attributes(buildSpanAttributes(action, connection)); } + public static SpanCreationContext from(String spanName, String nodeId, ReplicatedWriteRequest request) { + return SpanCreationContext.server().name(spanName).attributes(buildSpanAttributes(nodeId, request)); + } + private static String createSpanName(HttpRequest httpRequest) { return httpRequest.method().name() + SEPARATOR + httpRequest.uri(); } @@ -150,4 +156,18 @@ private static Attributes buildSpanAttributes(String action, TcpChannel tcpChann return attributes; } + private static Attributes buildSpanAttributes(String nodeId, ReplicatedWriteRequest request) { + Attributes attributes = Attributes.create() + .addAttribute(AttributeNames.NODE_ID, nodeId) + .addAttribute(AttributeNames.REFRESH_POLICY, request.getRefreshPolicy().getValue()); + if (request.shardId() != null) { + attributes.addAttribute(AttributeNames.INDEX, request.shardId().getIndexName()) + .addAttribute(AttributeNames.SHARD_ID, request.shardId().getId()); + } + if (request instanceof BulkShardRequest) { + attributes.addAttribute(AttributeNames.BULK_REQUEST_ITEMS, ((BulkShardRequest) request).items().length); + } + return attributes; + } + } diff --git a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java index fab7620292dd2..5f10986239300 100644 --- a/server/src/main/java/org/opensearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/opensearch/threadpool/ThreadPool.java @@ -273,7 +273,12 @@ public ThreadPool( ); builders.put( Names.REMOTE_RECOVERY, - new ScalingExecutorBuilder(Names.REMOTE_RECOVERY, 1, halfProcMaxAt10, TimeValue.timeValueMinutes(5)) + new ScalingExecutorBuilder( + Names.REMOTE_RECOVERY, + 1, + twiceAllocatedProcessors(allocatedProcessors), + TimeValue.timeValueMinutes(5) + ) ); if (FeatureFlags.isEnabled(FeatureFlags.CONCURRENT_SEGMENT_SEARCH)) { builders.put( diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 3491f18da9550..3050d1674a95b 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -36,10 +36,12 @@ import org.opensearch.action.admin.indices.stats.CommonStatsFlags; import org.opensearch.action.search.SearchRequestStats; import org.opensearch.cluster.coordination.PendingClusterStateStats; +import org.opensearch.cluster.coordination.PersistedStateStats; import org.opensearch.cluster.coordination.PublishClusterStateStats; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.routing.WeightedRoutingStats; import org.opensearch.cluster.service.ClusterManagerThrottlingStats; +import org.opensearch.cluster.service.ClusterStateStats; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.metrics.OperationStats; import org.opensearch.core.common.io.stream.StreamInput; @@ -47,8 +49,10 @@ import org.opensearch.core.indices.breaker.AllCircuitBreakerStats; import org.opensearch.core.indices.breaker.CircuitBreakerStats; import org.opensearch.discovery.DiscoveryStats; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.http.HttpStats; import org.opensearch.index.ReplicationStats; +import org.opensearch.index.SegmentReplicationRejectionStats; import org.opensearch.index.remote.RemoteSegmentStats; import org.opensearch.index.remote.RemoteTranslogTransferTracker; import org.opensearch.index.translog.RemoteTranslogStats; @@ -71,6 +75,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; @@ -348,6 +353,25 @@ public void testSerialization() throws IOException { assertEquals(queueStats.getTotal(), deserializedDiscoveryStats.getQueueStats().getTotal()); assertEquals(queueStats.getPending(), deserializedDiscoveryStats.getQueueStats().getPending()); } + ClusterStateStats stateStats = discoveryStats.getClusterStateStats(); + if (stateStats == null) { + assertNull(deserializedDiscoveryStats.getClusterStateStats()); + } else { + assertEquals(stateStats.getUpdateFailed(), deserializedDiscoveryStats.getClusterStateStats().getUpdateFailed()); + assertEquals(stateStats.getUpdateSuccess(), deserializedDiscoveryStats.getClusterStateStats().getUpdateSuccess()); + assertEquals( + stateStats.getUpdateTotalTimeInMillis(), + deserializedDiscoveryStats.getClusterStateStats().getUpdateTotalTimeInMillis() + ); + assertEquals(1, deserializedDiscoveryStats.getClusterStateStats().getPersistenceStats().size()); + PersistedStateStats deserializedRemoteStateStats = deserializedDiscoveryStats.getClusterStateStats() + .getPersistenceStats() + .get(0); + PersistedStateStats remoteStateStats = stateStats.getPersistenceStats().get(0); + assertEquals(remoteStateStats.getFailedCount(), deserializedRemoteStateStats.getFailedCount()); + assertEquals(remoteStateStats.getSuccessCount(), deserializedRemoteStateStats.getSuccessCount()); + assertEquals(remoteStateStats.getTotalTimeInMillis(), deserializedRemoteStateStats.getTotalTimeInMillis()); + } } IngestStats ingestStats = nodeStats.getIngestStats(); IngestStats deserializedIngestStats = deserializedNodeStats.getIngestStats(); @@ -417,6 +441,17 @@ public void testSerialization() throws IOException { assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); }); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = nodeStats.getSegmentReplicationRejectionStats(); + SegmentReplicationRejectionStats deserializedSegmentReplicationRejectionStats = deserializedNodeStats + .getSegmentReplicationRejectionStats(); + if (segmentReplicationRejectionStats == null) { + assertNull(deserializedSegmentReplicationRejectionStats); + } else { + assertEquals( + segmentReplicationRejectionStats.getTotalRejectionCount(), + deserializedSegmentReplicationRejectionStats.getTotalRejectionCount() + ); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -713,12 +748,16 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { ScriptStats scriptStats = frequently() ? new ScriptStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) : null; + ClusterStateStats stateStats = new ClusterStateStats(); + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + stateStats.setPersistenceStats(Arrays.asList(remoteStateStats)); DiscoveryStats discoveryStats = frequently() ? new DiscoveryStats( randomBoolean() ? new PendingClusterStateStats(randomInt(), randomInt(), randomInt()) : null, randomBoolean() ? new PublishClusterStateStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()) - : null + : null, + randomBoolean() ? stateStats : null ) : null; IngestStats ingestStats = null; @@ -812,6 +851,11 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); } + SegmentReplicationRejectionStats segmentReplicationRejectionStats = null; + if (frequently()) { + segmentReplicationRejectionStats = new SegmentReplicationRejectionStats(randomNonNegativeLong()); + } + ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -853,6 +897,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { null, null, null, + segmentReplicationRejectionStats, null ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index 0f67eff26cbde..cf7080ab2fc06 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -54,6 +54,7 @@ import org.opensearch.index.VersionType; import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -155,7 +156,8 @@ private void indicesThatCannotBeCreatedTestCase( new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) { @Override void executeBulk( diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index 515f6eae28a34..141c630b94020 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -70,6 +70,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -172,7 +173,8 @@ class TestTransportBulkAction extends TransportBulkAction { new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) ), null, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java index 10cad6fb147a2..6bbd740df7f9c 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTests.java @@ -118,7 +118,8 @@ class TestTransportBulkAction extends TransportBulkAction { new AutoCreateIndex(Settings.EMPTY, clusterService.getClusterSettings(), new Resolver(), new SystemIndices(emptyMap())), new IndexingPressureService(Settings.EMPTY, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java index 852e3837e1e7a..9d5b4430ea395 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionTookTests.java @@ -282,7 +282,8 @@ static class TestTransportBulkAction extends TransportBulkAction { new IndexingPressureService(Settings.EMPTY, clusterService), null, new SystemIndices(emptyMap()), - relativeTimeProvider + relativeTimeProvider, + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java index fe0fdd07025d9..b325cfa197933 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportShardBulkActionTests.java @@ -88,6 +88,7 @@ import org.opensearch.index.translog.Translog; import org.opensearch.indices.IndicesService; import org.opensearch.indices.SystemIndices; +import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -1074,7 +1075,8 @@ public void testHandlePrimaryTermValidationRequestWithDifferentAllocationId() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId + "-1", 1, shardId), @@ -1105,7 +1107,8 @@ public void testHandlePrimaryTermValidationRequestWithOlderPrimaryTerm() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1136,7 +1139,8 @@ public void testHandlePrimaryTermValidationRequestSuccess() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); action.handlePrimaryTermValidationRequest( new TransportShardBulkAction.PrimaryTermValidationRequest(aId, 1, shardId), @@ -1178,7 +1182,8 @@ private TransportShardBulkAction createAction() { mock(IndexingPressureService.class), mock(SegmentReplicationPressureService.class), mock(RemoteStorePressureService.class), - mock(SystemIndices.class) + mock(SystemIndices.class), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java index 3bd8930064563..da87a0a967f53 100644 --- a/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java +++ b/server/src/test/java/org/opensearch/action/resync/TransportResyncReplicationActionTests.java @@ -203,7 +203,8 @@ public void testResyncDoesNotBlockOnPrimaryAction() throws Exception { shardStateAction, new ActionFilters(new HashSet<>()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertThat(action.globalBlockLevel(), nullValue()); @@ -256,7 +257,8 @@ private TransportResyncReplicationAction createAction() { mock(ShardStateAction.class), new ActionFilters(new HashSet<>()), mock(IndexingPressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java index 4a2185d1558f7..7212b1f5efe13 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionForIndexingPressureTests.java @@ -392,7 +392,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, TransportWriteActionForIndexingPressureTests.this.indexingPressureService, - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java index 9d2069ac16190..b4549f82230bf 100644 --- a/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java +++ b/server/src/test/java/org/opensearch/action/support/replication/TransportWriteActionTests.java @@ -477,7 +477,8 @@ protected TestAction(boolean withDocumentFailureOnPrimary, boolean withDocumentF ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(Settings.EMPTY, TransportWriteActionTests.this.clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = withDocumentFailureOnPrimary; this.withDocumentFailureOnReplica = withDocumentFailureOnReplica; @@ -505,7 +506,8 @@ protected TestAction( ignore -> ThreadPool.Names.SAME, false, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); this.withDocumentFailureOnPrimary = false; this.withDocumentFailureOnReplica = false; diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 6f03e87bf5824..f037b75dc16a3 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -192,6 +192,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -220,6 +221,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -248,6 +250,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -307,6 +310,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -335,6 +339,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -363,6 +368,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java index e40826915c848..cace66d8c6d9e 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -139,6 +139,7 @@ import static org.opensearch.indices.IndicesService.CLUSTER_MINIMUM_INDEX_REFRESH_INTERVAL_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REMOTE_INDEX_RESTRICT_ASYNC_DURABILITY_SETTING; import static org.opensearch.indices.IndicesService.CLUSTER_REPLICATION_TYPE_SETTING; +import static org.opensearch.indices.IndicesService.CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING; import static org.opensearch.indices.ShardLimitValidatorTests.createTestShardLimitService; import static org.opensearch.node.Node.NODE_ATTRIBUTES; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY; @@ -1177,6 +1178,8 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "zone.values", "a, b") .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), true) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); when(clusterService.getSettings()).thenReturn(settings); @@ -1200,8 +1203,12 @@ public void testvalidateIndexSettings() { ); List validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); - assertThat(validationErrors.size(), is(1)); - assertThat(validationErrors.get(0), is("expected total copies needs to be a multiple of total awareness attributes [3]")); + assertThat(validationErrors.size(), is(2)); + assertThat( + validationErrors.get(0), + is("index setting [index.replication.type] is not allowed to be set as [cluster.restrict.index.replication_type=true]") + ); + assertThat(validationErrors.get(1), is("expected total copies needs to be a multiple of total awareness attributes [3]")); settings = Settings.builder() .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone, rack") @@ -1209,8 +1216,13 @@ public void testvalidateIndexSettings() { .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP_SETTING.getKey() + "rack.values", "c, d, e") .put(AwarenessReplicaBalance.CLUSTER_ROUTING_ALLOCATION_AWARENESS_BALANCE_SETTING.getKey(), true) .put(SETTING_NUMBER_OF_REPLICAS, 2) + .put(CLUSTER_RESTRICT_INDEX_REPLICATION_TYPE_SETTING.getKey(), false) + .put(SETTING_REPLICATION_TYPE, randomFrom(ReplicationType.values())) .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + when(clusterService.getClusterSettings()).thenReturn(clusterSettings); + validationErrors = checkerService.getIndexSettingsValidationErrors(settings, false, Optional.empty()); assertThat(validationErrors.size(), is(0)); diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index 40eefa6cdbf03..618fcb923bc60 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -627,6 +627,39 @@ public void testGlobalStateEqualsCoordinationMetadata() { assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); } + public void testGlobalResourcesStateEqualsCoordinationMetadata() { + CoordinationMetadata coordinationMetadata1 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata1 = Metadata.builder() + .coordinationMetadata(coordinationMetadata1) + .clusterUUID(randomAlphaOfLength(10)) + .clusterUUIDCommitted(false) + .hashesOfConsistentSettings(Map.of("a", "b")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + CoordinationMetadata coordinationMetadata2 = new CoordinationMetadata( + randomNonNegativeLong(), + randomVotingConfig(), + randomVotingConfig(), + randomVotingConfigExclusions() + ); + Metadata metadata2 = Metadata.builder() + .coordinationMetadata(coordinationMetadata2) + .clusterUUIDCommitted(true) + .clusterUUID(randomAlphaOfLength(11)) + .hashesOfConsistentSettings(Map.of("b", "a")) + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); + + assertTrue(Metadata.isGlobalStateEquals(metadata1, metadata1)); + assertFalse(Metadata.isGlobalStateEquals(metadata1, metadata2)); + assertTrue(Metadata.isGlobalResourcesMetadataEquals(metadata1, metadata2)); + } + public void testSerializationWithIndexGraveyard() throws IOException { final IndexGraveyard graveyard = IndexGraveyardTests.createRandom(); final Metadata originalMeta = Metadata.builder().indexGraveyard(graveyard).build(); diff --git a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java index 8b61e8f6d724d..c8a6fc76ce820 100644 --- a/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java +++ b/server/src/test/java/org/opensearch/cluster/node/DiscoveryNodeTests.java @@ -38,13 +38,16 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.test.NodeRoles; import org.opensearch.test.OpenSearchTestCase; import java.net.InetAddress; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Locale; +import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -81,6 +84,22 @@ public void testRolesAreSorted() { } + public void testRemoteStoreRedactionInToString() { + final Set roles = new HashSet<>(randomSubsetOf(DiscoveryNodeRole.BUILT_IN_ROLES)); + Map attributes = new HashMap<>(); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_SEGMENT_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + attributes.put(RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY, "test-repo"); + final DiscoveryNode node = new DiscoveryNode( + "name", + "id", + new TransportAddress(TransportAddress.META_ADDRESS, 9200), + attributes, + roles, + Version.CURRENT + ); + assertFalse(node.toString().contains(RemoteStoreNodeAttribute.REMOTE_STORE_NODE_ATTRIBUTE_KEY_PREFIX)); + } + public void testDiscoveryNodeIsCreatedWithHostFromInetAddress() throws Exception { InetAddress inetAddress = randomBoolean() ? InetAddress.getByName("192.0.2.1") diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 9cdbe04e0a0e4..4c0ca826f5dcc 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -691,6 +691,9 @@ public void clusterStateProcessed(String source, ClusterState oldState, ClusterS submittedTasksPerThread.get(entry.getKey()).get() ); } + // verify stats values after state is published + assertEquals(1, clusterManagerService.getClusterStateStats().getUpdateSuccess()); + assertEquals(0, clusterManagerService.getClusterStateStats().getUpdateFailed()); } } diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index 9b3fd45245ef7..1c43bb565ef69 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -41,6 +41,7 @@ import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodeRole; import org.opensearch.cluster.routing.IndexRoutingTable; +import org.opensearch.cluster.routing.IndexShardRoutingTable; import org.opensearch.cluster.routing.RecoverySource; import org.opensearch.cluster.routing.RoutingTable; import org.opensearch.cluster.routing.UnassignedInfo; @@ -52,12 +53,14 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.set.Sets; import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; import org.opensearch.repositories.IndexId; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; @@ -275,7 +278,7 @@ public void testUpdateRoutingTable() { } } - public void testSkipRoutingTableUpdateWhenRemoteRecovery() { + public void testRoutingTableUpdateWhenRemoteStateRecovery() { final int numOfShards = randomIntBetween(1, 10); final IndexMetadata remoteMetadata = createIndexMetadata( @@ -286,7 +289,7 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { .build() ); - // Test remote index routing table is generated with ExistingStoreRecoverySource if no routing table is present + // Test remote index routing table is generated with ExistingStoreRecoverySource { final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) @@ -322,48 +325,14 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { } - // Test remote index routing table is overridden if recovery source is not RemoteStoreRecoverySource + // Test remote index routing table is overridden if recovery source is RemoteStoreRecoverySource { - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsNew(remoteMetadata); final Index index = remoteMetadata.getIndex(); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .routingTable(new RoutingTable.Builder().add(remoteBuilderWithoutRemoteRecovery.build()).build()) - .build(); - assertTrue(initialState.routingTable().hasIndex(index)); - final ClusterState newState = updateRoutingTable(initialState); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - assertTrue(newState.routingTable().hasIndex(index)); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - - } - - // Test routing table update is skipped for a remote index - { + Map routingTableMap = new HashMap<>(); + for (int shardNumber = 0; shardNumber < remoteMetadata.getNumberOfShards(); shardNumber++) { + ShardId shardId = new ShardId(index, shardNumber); + routingTableMap.put(shardId, new IndexShardRoutingTable.Builder(new ShardId(remoteMetadata.getIndex(), 1)).build()); + } IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) .initializeAsRemoteStoreRestore( remoteMetadata, @@ -372,10 +341,9 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { remoteMetadata.getCreationVersion(), new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) ), - new HashMap<>(), + routingTableMap, true ); - final Index index = remoteMetadata.getIndex(); final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) .metadata(Metadata.builder().put(remoteMetadata, false).build()) .routingTable(new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()).build()) @@ -387,205 +355,28 @@ public void testSkipRoutingTableUpdateWhenRemoteRecovery() { assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - - } - - // Test reset routing table for 2 indices - one remote and one non remote. - // Routing table for non remote index should be updated and remote index routing table should remain intact - { - final IndexMetadata nonRemoteMetadata = createIndexMetadata( - "test-nonremote", - Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards).build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder nonRemoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder(nonRemoteMetadata.getIndex()) - .initializeAsNew(nonRemoteMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(nonRemoteMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(nonRemoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(nonRemoteMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newNonRemoteIndexRoutingTable = newState.routingTable().index(nonRemoteMetadata.getIndex()); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); - assertEquals( - 0, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) ) ); assertEquals( numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newNonRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - } - - // Test reset routing table for 2 indices, both remote backed but only once index has RemoteStoreRecoverySource. - // Routing table for only remote index without RemoteStoreRecoverySource should be updated - { - final IndexMetadata remoteWithoutRemoteRecoveryMetadata = createIndexMetadata( - "test-remote-without-recovery", - Settings.builder() - .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards) - .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) - .build() - ); - IndexRoutingTable.Builder remoteBuilderWithRemoteRecovery = new IndexRoutingTable.Builder(remoteMetadata.getIndex()) - .initializeAsRemoteStoreRestore( - remoteMetadata, - new RecoverySource.RemoteStoreRecoverySource( - UUIDs.randomBase64UUID(), - remoteMetadata.getCreationVersion(), - new IndexId(remoteMetadata.getIndex().getName(), remoteMetadata.getIndexUUID()) - ), - new HashMap<>(), - true - ); - IndexRoutingTable.Builder remoteBuilderWithoutRemoteRecovery = new IndexRoutingTable.Builder( - remoteWithoutRemoteRecoveryMetadata.getIndex() - ).initializeAsNew(remoteWithoutRemoteRecoveryMetadata); - final ClusterState initialState = ClusterState.builder(ClusterState.EMPTY_STATE) - .metadata(Metadata.builder().put(remoteMetadata, false).build()) - .metadata(Metadata.builder().put(remoteWithoutRemoteRecoveryMetadata, false).build()) - .routingTable( - new RoutingTable.Builder().add(remoteBuilderWithRemoteRecovery.build()) - .add(remoteBuilderWithoutRemoteRecovery.build()) - .build() - ) - .build(); - assertTrue(initialState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(initialState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - final ClusterState newState = updateRoutingTable(initialState); - assertTrue(newState.routingTable().hasIndex(remoteMetadata.getIndex())); - assertTrue(newState.routingTable().hasIndex(remoteWithoutRemoteRecoveryMetadata.getIndex())); - IndexRoutingTable newRemoteIndexRoutingTable = newState.routingTable().index(remoteMetadata.getIndex()); - IndexRoutingTable newRemoteWithoutRemoteRecoveryIndexRoutingTable = newState.routingTable() - .index(remoteWithoutRemoteRecoveryMetadata.getIndex()); - assertEquals( - 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) ) ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.EXISTING_INDEX_RESTORED) - ) - ); assertEquals( 0, newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource - ) - ); - assertEquals( - numOfShards, - newRemoteIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.INDEX_CREATED) - ) - ); - assertEquals( - numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( - shardRouting -> shardRouting.unassignedInfo().getReason().equals(UnassignedInfo.Reason.CLUSTER_RECOVERED) - ) - ); - assertEquals( - 0, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.RemoteStoreRecoverySource ) ); assertEquals( numOfShards, - newRemoteWithoutRemoteRecoveryIndexRoutingTable.shardsMatchingPredicateCount( + newRemoteIndexRoutingTable.shardsMatchingPredicateCount( shardRouting -> shardRouting.recoverySource() instanceof RecoverySource.EmptyStoreRecoverySource ) ); + } } diff --git a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java index 1d5c2a0f01b5c..fd113ed4313d7 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayMetaStatePersistedStateTests.java @@ -68,6 +68,7 @@ import org.opensearch.gateway.PersistedClusterStateService.Writer; import org.opensearch.gateway.remote.ClusterMetadataManifest; import org.opensearch.gateway.remote.RemoteClusterStateService; +import org.opensearch.gateway.remote.RemotePersistenceStats; import org.opensearch.index.recovery.RemoteStoreRestoreService; import org.opensearch.index.recovery.RemoteStoreRestoreService.RemoteRestoreResult; import org.opensearch.node.Node; @@ -104,6 +105,7 @@ import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; +import static org.mockito.Mockito.doCallRealMethod; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -779,6 +781,26 @@ public void testRemotePersistedStateExceptionOnFullStateUpload() throws IOExcept assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); } + public void testRemotePersistedStateFailureStats() throws IOException { + RemotePersistenceStats remoteStateStats = new RemotePersistenceStats(); + final RemoteClusterStateService remoteClusterStateService = Mockito.mock(RemoteClusterStateService.class); + final String previousClusterUUID = "prev-cluster-uuid"; + Mockito.doThrow(IOException.class).when(remoteClusterStateService).writeFullMetadata(Mockito.any(), Mockito.any()); + when(remoteClusterStateService.getStats()).thenReturn(remoteStateStats); + doCallRealMethod().when(remoteClusterStateService).writeMetadataFailed(); + CoordinationState.PersistedState remotePersistedState = new RemotePersistedState(remoteClusterStateService, previousClusterUUID); + + final long clusterTerm = randomNonNegativeLong(); + final ClusterState clusterState = createClusterState( + randomNonNegativeLong(), + Metadata.builder().coordinationMetadata(CoordinationMetadata.builder().term(clusterTerm).build()).build() + ); + + assertThrows(OpenSearchException.class, () -> remotePersistedState.setLastAcceptedState(clusterState)); + assertEquals(1, remoteClusterStateService.getStats().getFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + } + public void testGatewayForRemoteState() throws IOException { MockGatewayMetaState gateway = null; try { diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 433eac63e9580..5a43864f40c0c 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -9,6 +9,7 @@ package org.opensearch.gateway.remote; import org.opensearch.Version; +import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.blobstore.transfer.RemoteTransferContainer; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.lucene.store.ByteArrayIndexInput; +import org.opensearch.common.network.NetworkModule; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.ParseField; @@ -37,6 +39,7 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; +import org.opensearch.indices.IndicesModule; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryMissingException; @@ -65,11 +68,14 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Stream; import org.mockito.ArgumentCaptor; import org.mockito.ArgumentMatchers; +import static java.util.stream.Collectors.toList; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; @@ -96,6 +102,7 @@ public class RemoteClusterStateServiceTests extends OpenSearchTestCase { private RemoteClusterStateService remoteClusterStateService; + private ClusterSettings clusterSettings; private Supplier repositoriesServiceSupplier; private RepositoriesService repositoriesService; private BlobStoreRepository blobStoreRepository; @@ -126,16 +133,25 @@ public void setup() { .put(RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), true) .build(); + clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + NamedXContentRegistry xContentRegistry = new NamedXContentRegistry( + Stream.of( + NetworkModule.getNamedXContents().stream(), + IndicesModule.getNamedXContents().stream(), + ClusterModule.getNamedXWriteables().stream() + ).flatMap(Function.identity()).collect(toList()) + ); + blobStoreRepository = mock(BlobStoreRepository.class); blobStore = mock(BlobStore.class); when(blobStoreRepository.blobStore()).thenReturn(blobStore); when(repositoriesService.repository("remote_store_repository")).thenReturn(blobStoreRepository); - when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry(new ArrayList<>())); + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(xContentRegistry); remoteClusterStateService = new RemoteClusterStateService( "test-node-id", repositoriesServiceSupplier, settings, - new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + clusterSettings, () -> 0L, threadPool ); @@ -257,7 +273,7 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { new BytesArray(writtenBytes) ); - assertEquals(capturedWriteContext.getWritePriority(), WritePriority.HIGH); + assertEquals(capturedWriteContext.getWritePriority(), WritePriority.URGENT); assertEquals(writtenIndexMetadata.getNumberOfShards(), 1); assertEquals(writtenIndexMetadata.getNumberOfReplicas(), 0); assertEquals(writtenIndexMetadata.getIndex().getName(), "test-index"); @@ -308,6 +324,7 @@ public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOEx RemoteClusterStateService.IndexMetadataTransferException.class, () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) ); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOException { @@ -315,6 +332,7 @@ public void testFailWriteIncrementalMetadataNonClusterManagerNode() throws IOExc remoteClusterStateService.start(); final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata(clusterState, clusterState, null); Assert.assertThat(manifest, nullValue()); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); } public void testFailWriteIncrementalMetadataWhenTermChanged() { @@ -458,7 +476,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { mockBlobStoreObjects(); final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) - .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata).version(randomNonNegativeLong())) .build(); final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() .codecVersion(2) @@ -479,6 +497,7 @@ public void testGlobalMetadataOnlyUpdated() throws IOException { // new cluster state where only global metadata is different Metadata newMetadata = Metadata.builder(clusterState.metadata()) .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .version(randomNonNegativeLong()) .build(); ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); @@ -811,9 +830,9 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { ).getIndices(); assertEquals(indexMetadataMap.size(), 1); - assertEquals(indexMetadataMap.get(index.getUUID()).getIndex().getName(), index.getName()); - assertEquals(indexMetadataMap.get(index.getUUID()).getNumberOfShards(), indexMetadata.getNumberOfShards()); - assertEquals(indexMetadataMap.get(index.getUUID()).getNumberOfReplicas(), indexMetadata.getNumberOfReplicas()); + assertEquals(indexMetadataMap.get(index.getName()).getIndex().getName(), index.getName()); + assertEquals(indexMetadataMap.get(index.getName()).getNumberOfShards(), indexMetadata.getNumberOfShards()); + assertEquals(indexMetadataMap.get(index.getName()).getNumberOfReplicas(), indexMetadata.getNumberOfReplicas()); } public void testMarkLastStateAsCommittedSuccess() throws IOException { @@ -886,7 +905,7 @@ public void testGetValidPreviousClusterUUIDWithMultipleChains() throws IOExcepti "cluster-uuid3", "cluster-uuid1" ); - mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers); + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, randomBoolean()); remoteClusterStateService.start(); String previousClusterUUID = remoteClusterStateService.getLastKnownUUIDFromRemote("test-cluster"); @@ -974,6 +993,38 @@ public void testDeleteStaleClusterUUIDs() throws IOException { } } + public void testRemoteStateStats() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeFullMetadata(clusterState, "prev-cluster-uuid"); + + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(1, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(0, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + assertEquals(0, remoteClusterStateService.getStats().getFailedCount()); + } + + public void testRemoteStateCleanupFailureStats() throws IOException { + BlobContainer blobContainer = mock(BlobContainer.class); + doThrow(IOException.class).when(blobContainer).delete(); + when(blobStore.blobContainer(any())).thenReturn(blobContainer); + BlobPath blobPath = new BlobPath().add("random-path"); + when((blobStoreRepository.basePath())).thenReturn(blobPath); + remoteClusterStateService.start(); + remoteClusterStateService.deleteStaleUUIDsClusterMetadata("cluster1", Arrays.asList("cluster-uuid1")); + try { + assertBusy(() -> { + // wait for stats to get updated + assertTrue(remoteClusterStateService.getStats() != null); + assertEquals(0, remoteClusterStateService.getStats().getSuccessCount()); + assertEquals(1, remoteClusterStateService.getStats().getCleanupAttemptFailedCount()); + }); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + public void testFileNames() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() @@ -1038,7 +1089,44 @@ public void testSingleConcurrentExecutionOfStaleManifestCleanup() throws Excepti assertBusy(() -> assertEquals(1, callCount.get())); } + public void testIndexMetadataUploadWaitTimeSetting() { + // verify default value + assertEquals( + RemoteClusterStateService.INDEX_METADATA_UPLOAD_TIMEOUT_DEFAULT, + remoteClusterStateService.getIndexMetadataUploadTimeout() + ); + + // verify update index metadata upload timeout + int indexMetadataUploadTimeout = randomIntBetween(1, 10); + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.index_metadata.upload_timeout", indexMetadataUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(indexMetadataUploadTimeout, remoteClusterStateService.getIndexMetadataUploadTimeout().seconds()); + } + + public void testGlobalMetadataUploadWaitTimeSetting() { + // verify default value + assertEquals( + RemoteClusterStateService.GLOBAL_METADATA_UPLOAD_TIMEOUT_DEFAULT, + remoteClusterStateService.getGlobalMetadataUploadTimeout() + ); + + // verify update global metadata upload timeout + int globalMetadataUploadTimeout = randomIntBetween(1, 10); + Settings newSettings = Settings.builder() + .put("cluster.remote_store.state.global_metadata.upload_timeout", globalMetadataUploadTimeout + "s") + .build(); + clusterSettings.applySettings(newSettings); + assertEquals(globalMetadataUploadTimeout, remoteClusterStateService.getGlobalMetadataUploadTimeout().seconds()); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { + mockObjectsForGettingPreviousClusterUUID(clusterUUIDsPointers, false); + } + + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers, boolean differGlobalMetadata) + throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); when(blobPath.add(anyString())).thenReturn(blobPath); @@ -1060,7 +1148,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid1", clusterUUIDsPointers.get("cluster-uuid1"), randomAlphaOfLength(10), - uploadedIndexMetadataList1 + uploadedIndexMetadataList1, + "test-metadata1" ); Settings indexSettings = Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexMetadata indexMetadata1 = IndexMetadata.builder("index1") @@ -1073,8 +1162,12 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata1 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap1 = Map.of("index-uuid1", indexMetadata1, "index-uuid2", indexMetadata2); - mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1); + mockBlobContainerForGlobalMetadata(blobContainer1, clusterManifest1, metadata1); + mockBlobContainer(blobContainer1, clusterManifest1, indexMetadataMap1, ClusterMetadataManifest.CODEC_V1); List uploadedIndexMetadataList2 = List.of( new UploadedIndexMetadata("index1", "index-uuid1", "key1"), @@ -1084,7 +1177,8 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste "cluster-uuid2", clusterUUIDsPointers.get("cluster-uuid2"), randomAlphaOfLength(10), - uploadedIndexMetadataList2 + uploadedIndexMetadataList2, + "test-metadata2" ); IndexMetadata indexMetadata3 = IndexMetadata.builder("index1") .settings(indexSettings) @@ -1096,37 +1190,59 @@ private void mockObjectsForGettingPreviousClusterUUID(Map cluste .numberOfShards(1) .numberOfReplicas(1) .build(); + Metadata metadata2 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), true).build()) + .build(); Map indexMetadataMap2 = Map.of("index-uuid1", indexMetadata3, "index-uuid2", indexMetadata4); - mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2); + mockBlobContainerForGlobalMetadata(blobContainer2, clusterManifest2, metadata2); + mockBlobContainer(blobContainer2, clusterManifest2, indexMetadataMap2, ClusterMetadataManifest.CODEC_V1); + + // differGlobalMetadata controls which one of IndexMetadata or Metadata object would be different + // when comparing cluster-uuid3 and cluster-uuid1 state. + // if set true, only Metadata will differ b/w cluster uuid1 and cluster uuid3. + // If set to false, only IndexMetadata would be different + // Adding difference in EXACTLY on of these randomly will help us test if our uuid trimming logic compares both + // IndexMetadata and Metadata when deciding if the remote state b/w two different cluster uuids is same. + List uploadedIndexMetadataList3 = differGlobalMetadata + ? new ArrayList<>(uploadedIndexMetadataList1) + : List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); + IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") + .settings(indexSettings) + .numberOfShards(1) + .numberOfReplicas(1) + .build(); + Map indexMetadataMap3 = differGlobalMetadata + ? new HashMap<>(indexMetadataMap1) + : Map.of("index-uuid1", indexMetadata5); + Metadata metadata3 = Metadata.builder() + .persistentSettings(Settings.builder().put(Metadata.SETTING_READ_ONLY_SETTING.getKey(), !differGlobalMetadata).build()) + .build(); - List uploadedIndexMetadataList3 = List.of(new UploadedIndexMetadata("index1", "index-uuid1", "key1")); final ClusterMetadataManifest clusterManifest3 = generateClusterMetadataManifest( "cluster-uuid3", clusterUUIDsPointers.get("cluster-uuid3"), randomAlphaOfLength(10), - uploadedIndexMetadataList3 + uploadedIndexMetadataList3, + "test-metadata3" ); - IndexMetadata indexMetadata5 = IndexMetadata.builder("index1") - .settings(indexSettings) - .numberOfShards(1) - .numberOfReplicas(1) - .build(); - Map indexMetadataMap3 = Map.of("index-uuid1", indexMetadata5); - mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3); - - when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn( - uuidBlobContainer, - blobContainer1, - blobContainer1, - blobContainer3, - blobContainer3, - blobContainer2, - blobContainer2, - blobContainer1, - blobContainer2, - blobContainer1, - blobContainer2 + mockBlobContainerForGlobalMetadata(blobContainer3, clusterManifest3, metadata3); + mockBlobContainer(blobContainer3, clusterManifest3, indexMetadataMap3, ClusterMetadataManifest.CODEC_V1); + + ArrayList mockBlobContainerOrderedList = new ArrayList<>( + List.of(blobContainer1, blobContainer1, blobContainer3, blobContainer3, blobContainer2, blobContainer2) + ); + + if (differGlobalMetadata) { + mockBlobContainerOrderedList.addAll( + List.of(blobContainer3, blobContainer1, blobContainer3, blobContainer1, blobContainer1, blobContainer3) + ); + } + mockBlobContainerOrderedList.addAll( + List.of(blobContainer2, blobContainer1, blobContainer2, blobContainer1, blobContainer1, blobContainer2) ); + BlobContainer[] mockBlobContainerOrderedArray = new BlobContainer[mockBlobContainerOrderedList.size()]; + mockBlobContainerOrderedList.toArray(mockBlobContainerOrderedArray); + when(blobStore.blobContainer(ArgumentMatchers.any())).thenReturn(uuidBlobContainer, mockBlobContainerOrderedArray); when(blobStoreRepository.getCompressor()).thenReturn(new DeflateCompressor()); } @@ -1134,7 +1250,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( String clusterUUID, String previousClusterUUID, String stateUUID, - List uploadedIndexMetadata + List uploadedIndexMetadata, + String globalMetadataFileName ) { return ClusterMetadataManifest.builder() .indices(uploadedIndexMetadata) @@ -1147,7 +1264,8 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) - .globalMetadataFileName("test-global-metadata") + .globalMetadataFileName(globalMetadataFileName) + .codecVersion(ClusterMetadataManifest.CODEC_V1) .build(); } @@ -1180,17 +1298,29 @@ private void mockBlobContainer( ClusterMetadataManifest clusterMetadataManifest, Map indexMetadataMap ) throws IOException { - BlobMetadata blobMetadata = new PlainBlobMetadata("manifestFileName", 1); + mockBlobContainer(blobContainer, clusterMetadataManifest, indexMetadataMap, ClusterMetadataManifest.CODEC_V0); + } + + private void mockBlobContainer( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Map indexMetadataMap, + int codecVersion + ) throws IOException { + String manifestFileName = codecVersion >= ClusterMetadataManifest.CODEC_V1 + ? "manifest__manifestFileName__abcd__abcd__abcd__1" + : "manifestFileName"; + BlobMetadata blobMetadata = new PlainBlobMetadata(manifestFileName, 1); when(blobContainer.listBlobsByPrefixInSortedOrder("manifest" + DELIMITER, 1, BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC)) .thenReturn(Arrays.asList(blobMetadata)); BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, - "manifestFileName", + manifestFileName, blobStoreRepository.getCompressor(), FORMAT_PARAMS ); - when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + when(blobContainer.readBlob(manifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { try { @@ -1199,15 +1329,15 @@ private void mockBlobContainer( return; } String fileName = uploadedIndexMetadata.getUploadedFilename(); - BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( - indexMetadata, - fileName, - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); - when(blobContainer.readBlob(fileName + ".dat")).thenReturn( - new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) - ); + when(blobContainer.readBlob(fileName + ".dat")).thenAnswer((invocationOnMock) -> { + BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( + indexMetadata, + fileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()); + }); } catch (IOException e) { throw new RuntimeException(e); } @@ -1237,15 +1367,17 @@ private void mockBlobContainerForGlobalMetadata( ); when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); - BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( - metadata, - "global-metadata-file", - blobStoreRepository.getCompressor(), - FORMAT_PARAMS - ); String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); - when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( - new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenAnswer( + (invocationOnMock) -> { + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + return new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()); + } ); } @@ -1281,7 +1413,12 @@ private static ClusterState.Builder generateClusterStateWithOneIndex() { .version(1L) .stateUUID("state-uuid") .metadata( - Metadata.builder().put(indexMetadata, true).clusterUUID("cluster-uuid").coordinationMetadata(coordinationMetadata).build() + Metadata.builder() + .version(randomNonNegativeLong()) + .put(indexMetadata, true) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() ); } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java index 34fa13f0ba62c..478fdcb24f76a 100644 --- a/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationPressureServiceTests.java @@ -278,6 +278,13 @@ private SegmentReplicationPressureService buildPressureService(Settings settings ClusterService clusterService = mock(ClusterService.class); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - return new SegmentReplicationPressureService(settings, clusterService, indicesService, shardStateAction, mock(ThreadPool.class)); + return new SegmentReplicationPressureService( + settings, + clusterService, + indicesService, + shardStateAction, + new SegmentReplicationStatsTracker(indicesService), + mock(ThreadPool.class) + ); } } diff --git a/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java new file mode 100644 index 0000000000000..04423d583e8f9 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/SegmentReplicationStatsTrackerTests.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index; + +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import org.mockito.Mockito; + +import static org.mockito.Mockito.mock; + +public class SegmentReplicationStatsTrackerTests extends OpenSearchTestCase { + + private IndicesService indicesService = mock(IndicesService.class); + + public void testRejectedCount() { + SegmentReplicationStatsTracker segmentReplicationStatsTracker = new SegmentReplicationStatsTracker(indicesService); + + // Verify that total rejection count is 0 on an empty rejectionCount map in statsTracker. + assertTrue(segmentReplicationStatsTracker.getRejectionCount().isEmpty()); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 0L); + + // Verify that total rejection count is 1 after incrementing rejectionCount. + segmentReplicationStatsTracker.incrementRejectionCount(Mockito.mock(ShardId.class)); + assertEquals(segmentReplicationStatsTracker.getTotalRejectionStats().getTotalRejectionCount(), 1L); + } + +} diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java index 0bf00f9e48137..c87cdfcc8f1a1 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java @@ -23,6 +23,8 @@ import java.util.HashMap; import java.util.Map; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; + public class RemoteSegmentTransferTrackerTests extends OpenSearchTestCase { private RemoteStoreStatsTrackerFactory remoteStoreStatsTrackerFactory; private ClusterService clusterService; @@ -92,7 +94,7 @@ public void testUpdateLocalRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000L + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateLocalRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getLocalRefreshTimeMs()); } @@ -103,7 +105,7 @@ public void testUpdateRemoteRefreshTimeMs() { directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long refreshTimeMs = System.nanoTime() / 1_000_000 + randomIntBetween(10, 100); + long refreshTimeMs = currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100); transferTracker.updateRemoteRefreshTimeMs(refreshTimeMs); assertEquals(refreshTimeMs, transferTracker.getRemoteRefreshTimeMs()); } @@ -133,20 +135,29 @@ public void testComputeSeqNoLagOnUpdate() { assertEquals(localRefreshSeqNo - remoteRefreshSeqNo, transferTracker.getRefreshSeqNoLag()); } - public void testComputeTimeLagOnUpdate() { + public void testComputeTimeLagOnUpdate() throws InterruptedException { transferTracker = new RemoteSegmentTransferTracker( shardId, directoryFileTransferTracker, remoteStoreStatsTrackerFactory.getMovingAverageWindowSize() ); - long currentLocalRefreshTimeMs = transferTracker.getLocalRefreshTimeMs(); - long currentTimeMs = System.nanoTime() / 1_000_000L; - long localRefreshTimeMs = currentTimeMs + randomIntBetween(100, 500); - long remoteRefreshTimeMs = currentTimeMs + randomIntBetween(50, 99); - transferTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); - assertEquals(localRefreshTimeMs - currentLocalRefreshTimeMs, transferTracker.getTimeMsLag()); - transferTracker.updateRemoteRefreshTimeMs(remoteRefreshTimeMs); - assertEquals(localRefreshTimeMs - remoteRefreshTimeMs, transferTracker.getTimeMsLag()); + + // No lag if there is a remote upload corresponding to a local refresh + assertEquals(0, transferTracker.getTimeMsLag()); + + // Set a local refresh time that is higher than remote refresh time + Thread.sleep(1); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + + // Sleep for 100ms and then the lag should be within 100ms +/- 20ms + Thread.sleep(100); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - 100) <= 20); + + transferTracker.updateRemoteRefreshTimeMs(transferTracker.getLocalRefreshTimeMs()); + transferTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + long random = randomIntBetween(50, 200); + Thread.sleep(random); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - random) <= 20); } public void testAddUploadBytesStarted() { @@ -519,7 +530,7 @@ public void testStatsObjectCreation() { transferTracker = constructTracker(); RemoteSegmentTransferTracker.Stats transferTrackerStats = transferTracker.stats(); assertEquals(transferTracker.getShardId(), transferTrackerStats.shardId); - assertEquals(transferTracker.getTimeMsLag(), (int) transferTrackerStats.refreshTimeLagMs); + assertTrue(Math.abs(transferTracker.getTimeMsLag() - transferTrackerStats.refreshTimeLagMs) <= 20); assertEquals(transferTracker.getLocalRefreshSeqNo(), (int) transferTrackerStats.localRefreshNumber); assertEquals(transferTracker.getRemoteRefreshSeqNo(), (int) transferTrackerStats.remoteRefreshNumber); assertEquals(transferTracker.getBytesLag(), (int) transferTrackerStats.bytesLag); @@ -591,9 +602,9 @@ private RemoteSegmentTransferTracker constructTracker() { ); transferTracker.incrementTotalUploadsStarted(); transferTracker.incrementTotalUploadsFailed(); - transferTracker.updateUploadTimeMovingAverage(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateUploadTimeMovingAverage(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.updateUploadBytesMovingAverage(99); - transferTracker.updateRemoteRefreshTimeMs(System.nanoTime() / 1_000_000L + randomIntBetween(10, 100)); + transferTracker.updateRemoteRefreshTimeMs(currentTimeMsUsingSystemNanos() + randomIntBetween(10, 100)); transferTracker.incrementRejectionCount(); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesStarted(10); transferTracker.getDirectoryFileTransferTracker().addTransferredBytesSucceeded(10, System.currentTimeMillis()); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java index de610083f3327..cb77174e612fd 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java @@ -21,8 +21,11 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.IntStream; +import static org.opensearch.index.remote.RemoteSegmentTransferTracker.currentTimeMsUsingSystemNanos; import static org.opensearch.index.remote.RemoteStoreTestsHelper.createIndexShard; public class RemoteStorePressureServiceTests extends OpenSearchTestCase { @@ -68,7 +71,7 @@ public void testIsSegmentsUploadBackpressureEnabled() { assertTrue(pressureService.isSegmentsUploadBackpressureEnabled()); } - public void testValidateSegmentUploadLag() { + public void testValidateSegmentUploadLag() throws InterruptedException { // Create the pressure tracker IndexShard indexShard = createIndexShard(shardId, true); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, Settings.EMPTY); @@ -86,14 +89,27 @@ public void testValidateSegmentUploadLag() { sum.addAndGet(i); }); double avg = (double) sum.get() / 20; - long currentMs = System.nanoTime() / 1_000_000; - pressureTracker.updateLocalRefreshTimeMs((long) (currentMs + 12 * avg)); - pressureTracker.updateRemoteRefreshTimeMs(currentMs); - Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); - assertTrue(e.getMessage().contains("due to remote segments lagging behind local segments")); - assertTrue(e.getMessage().contains("time_lag:114 ms dynamic_time_lag_threshold:95.0 ms")); - pressureTracker.updateRemoteRefreshTimeMs((long) (currentMs + 2 * avg)); + // We run this to ensure that the local and remote refresh time are not same anymore + while (pressureTracker.getLocalRefreshTimeMs() == currentTimeMsUsingSystemNanos()) { + Thread.sleep(10); + } + long localRefreshTimeMs = currentTimeMsUsingSystemNanos(); + pressureTracker.updateLocalRefreshTimeMs(localRefreshTimeMs); + + while (currentTimeMsUsingSystemNanos() - localRefreshTimeMs <= 20 * avg) { + Thread.sleep((long) (4 * avg)); + } + Exception e = assertThrows(OpenSearchRejectedExecutionException.class, () -> pressureService.validateSegmentsUploadLag(shardId)); + String regex = "^rejected execution on primary shard:\\[index]\\[0] due to remote segments lagging behind " + + "local segments.time_lag:[0-9]{2,3} ms dynamic_time_lag_threshold:95\\.0 ms$"; + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(e.getMessage()); + assertTrue(matcher.matches()); + + pressureTracker.updateRemoteRefreshTimeMs(pressureTracker.getLocalRefreshTimeMs()); + pressureTracker.updateLocalRefreshTimeMs(currentTimeMsUsingSystemNanos()); + Thread.sleep((long) (2 * avg)); pressureService.validateSegmentsUploadLag(shardId); // 2. bytes lag more than dynamic threshold diff --git a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java index d9bca55a208c2..63a9ac2f2e8ec 100644 --- a/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java +++ b/server/src/test/java/org/opensearch/index/seqno/RetentionLeaseSyncActionTests.java @@ -125,7 +125,8 @@ public void testRetentionLeaseSyncActionOnPrimary() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -162,7 +163,8 @@ public void testRetentionLeaseSyncActionOnReplica() throws Exception { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); final RetentionLeases retentionLeases = mock(RetentionLeases.class); final RetentionLeaseSyncAction.Request request = new RetentionLeaseSyncAction.Request(indexShard.shardId(), retentionLeases); @@ -203,7 +205,8 @@ public void testBlocks() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); assertNull(action.indexBlockLevel()); @@ -233,7 +236,8 @@ private RetentionLeaseSyncAction createAction() { shardStateAction, new ActionFilters(Collections.emptySet()), new IndexingPressureService(Settings.EMPTY, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); } diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index 9ef9bec01cb38..fa3cf7676f55c 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -2745,6 +2745,7 @@ public void testRelocatedForRemoteTranslogBackedIndexWithAsyncDurability() throw AllocationId.newRelocation(routing.allocationId()) ); IndexShardTestCase.updateRoutingEntry(indexShard, routing); + indexDoc(indexShard, "_doc", "0"); assertTrue(indexShard.isSyncNeeded()); try { indexShard.relocated(routing.getTargetRelocatingShard().allocationId().getId(), primaryContext -> {}, () -> {}); diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index fe389e3b3fcb4..703a7d457d5b6 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -471,6 +471,15 @@ public void onReplicationFailure( } } + @Override + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertFalse(primary.isSearchIdleSupported()); + assertTrue(primary.isSearchIdle()); + assertTrue(primary.scheduledRefresh()); + assertFalse(primary.hasRefreshPending()); + } + private void assertSingleSegmentFile(IndexShard shard, String fileName) throws IOException { final Set segmentsFileNames = Arrays.stream(shard.store().directory().listAll()) .filter(file -> file.startsWith(IndexFileNames.SEGMENTS)) diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index eab38bfe5c64d..7caff3e5f5479 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -436,13 +436,17 @@ public void testShardIdleWithNoReplicas() throws Exception { shards.startAll(); final IndexShard primary = shards.getPrimary(); shards.indexDocs(randomIntBetween(1, 10)); - // ensure search idle conditions are met. - assertTrue(primary.isSearchIdle()); - assertFalse(primary.scheduledRefresh()); - assertTrue(primary.hasRefreshPending()); + validateShardIdleWithNoReplicas(primary); } } + protected void validateShardIdleWithNoReplicas(IndexShard primary) { + // ensure search idle conditions are met. + assertTrue(primary.isSearchIdle()); + assertFalse(primary.scheduledRefresh()); + assertTrue(primary.hasRefreshPending()); + } + /** * here we are starting a new primary shard in PrimaryMode and testing if the shard publishes checkpoint after refresh. */ diff --git a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java index 42e0df2dc90c1..3cb65610fab58 100644 --- a/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java +++ b/server/src/test/java/org/opensearch/index/translog/RemoteFsTranslogTests.java @@ -97,6 +97,7 @@ import java.util.zip.CheckedInputStream; import static org.opensearch.common.util.BigArrays.NON_RECYCLING_INSTANCE; +import static org.opensearch.index.IndexSettings.INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING; import static org.opensearch.index.translog.RemoteFsTranslog.TRANSLOG; import static org.opensearch.index.translog.SnapshotMatchers.containsOperationsInAnyOrder; import static org.opensearch.index.translog.TranslogDeletionPolicies.createTranslogDeletionPolicy; @@ -124,6 +125,8 @@ public class RemoteFsTranslogTests extends OpenSearchTestCase { private ThreadPool threadPool; private final static String METADATA_DIR = "metadata"; private final static String DATA_DIR = "data"; + + AtomicInteger writeCalls = new AtomicInteger(); BlobStoreRepository repository; BlobStoreTransferService blobStoreTransferService; @@ -163,13 +166,13 @@ public void tearDown() throws Exception { private RemoteFsTranslog create(Path path) throws IOException { final String translogUUID = Translog.createEmptyTranslog(path, SequenceNumbers.NO_OPS_PERFORMED, shardId, primaryTerm.get()); - return create(path, createRepository(), translogUUID); + return create(path, createRepository(), translogUUID, 0); } - private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID) throws IOException { + private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID, int extraGenToKeep) throws IOException { this.repository = repository; globalCheckpoint = new AtomicLong(SequenceNumbers.NO_OPS_PERFORMED); - final TranslogConfig translogConfig = getTranslogConfig(path); + final TranslogConfig translogConfig = getTranslogConfig(path, extraGenToKeep); final TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(translogConfig.getIndexSettings()); threadPool = new TestThreadPool(getClass().getName()); blobStoreTransferService = new BlobStoreTransferService(repository.blobStore(), threadPool); @@ -185,10 +188,17 @@ private RemoteFsTranslog create(Path path, BlobStoreRepository repository, Strin primaryMode::get, new RemoteTranslogTransferTracker(shardId, 10) ); + } + private RemoteFsTranslog create(Path path, BlobStoreRepository repository, String translogUUID) throws IOException { + return create(path, repository, translogUUID, 0); } private TranslogConfig getTranslogConfig(final Path path) { + return getTranslogConfig(path, 0); + } + + private TranslogConfig getTranslogConfig(final Path path, int gensToKeep) { final Settings settings = Settings.builder() .put(IndexMetadata.SETTING_VERSION_CREATED, org.opensearch.Version.CURRENT) // only randomize between nog age retention and a long one, so failures will have a chance of reproducing @@ -196,6 +206,7 @@ private TranslogConfig getTranslogConfig(final Path path) { .put(IndexSettings.INDEX_TRANSLOG_RETENTION_SIZE_SETTING.getKey(), randomIntBetween(-1, 2048) + "b") .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, true) + .put(INDEX_REMOTE_TRANSLOG_KEEP_EXTRA_GEN_SETTING.getKey(), gensToKeep) .build(); return getTranslogConfig(path, settings); } @@ -372,6 +383,111 @@ public void testSimpleOperations() throws IOException { } + private TranslogConfig getConfig(int gensToKeep) { + Path tempDir = createTempDir(); + final TranslogConfig temp = getTranslogConfig(tempDir, gensToKeep); + final TranslogConfig config = new TranslogConfig( + temp.getShardId(), + temp.getTranslogPath(), + temp.getIndexSettings(), + temp.getBigArrays(), + new ByteSizeValue(1, ByteSizeUnit.KB), + "" + ); + return config; + } + + private ChannelFactory getChannelFactory() { + writeCalls = new AtomicInteger(); + final ChannelFactory channelFactory = (file, openOption) -> { + FileChannel delegate = FileChannel.open(file, openOption); + boolean success = false; + try { + // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation + final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); + + final FileChannel channel; + if (isCkpFile) { + channel = delegate; + } else { + channel = new FilterFileChannel(delegate) { + + @Override + public int write(ByteBuffer src) throws IOException { + writeCalls.incrementAndGet(); + return super.write(src); + } + }; + } + success = true; + return channel; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(delegate); + } + } + }; + return channelFactory; + } + + public void testExtraGenToKeep() throws Exception { + TranslogConfig config = getConfig(1); + ChannelFactory channelFactory = getChannelFactory(); + final Set persistedSeqNos = new HashSet<>(); + String translogUUID = Translog.createEmptyTranslog( + config.getTranslogPath(), + SequenceNumbers.NO_OPS_PERFORMED, + shardId, + channelFactory, + primaryTerm.get() + ); + TranslogDeletionPolicy deletionPolicy = createTranslogDeletionPolicy(config.getIndexSettings()); + ArrayList ops = new ArrayList<>(); + try ( + RemoteFsTranslog translog = new RemoteFsTranslog( + config, + translogUUID, + deletionPolicy, + () -> SequenceNumbers.NO_OPS_PERFORMED, + primaryTerm::get, + persistedSeqNos::add, + repository, + threadPool, + () -> Boolean.TRUE, + new RemoteTranslogTransferTracker(shardId, 10) + ) { + @Override + ChannelFactory getChannelFactory() { + return channelFactory; + } + } + ) { + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("1", 0, primaryTerm.get(), new byte[] { 1 })); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 1, primaryTerm.get(), new byte[] { 1 })); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("3", 2, primaryTerm.get(), new byte[] { 1 })); + + // expose the new checkpoint (simulating a commit), before we trim the translog + translog.setMinSeqNoToKeep(2); + + // Trims from local + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + + addToTranslogAndListAndUpload(translog, ops, new Translog.Index("4", 3, primaryTerm.get(), new byte[] { 1 })); + + // Trims from remote now + translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + assertEquals( + 6, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + + } + } + public void testReadLocation() throws IOException { ArrayList ops = new ArrayList<>(); ArrayList locs = new ArrayList<>(); @@ -619,14 +735,22 @@ public void testSimpleOperationsUpload() throws Exception { // this should now trim as tlog-2 files from remote, but not tlog-3 and tlog-4 addToTranslogAndListAndUpload(translog, ops, new Translog.Index("2", 2, primaryTerm.get(), new byte[] { 1 })); assertEquals(2, translog.stats().estimatedNumberOfOperations()); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); translog.setMinSeqNoToKeep(2); - - assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); + // this should now trim as tlog-2 files from remote, but not tlog-3 and tlog-4 translog.trimUnreferencedReaders(); + assertBusy(() -> assertTrue(translog.isRemoteGenerationDeletionPermitsAvailable())); assertEquals(1, translog.readers.size()); assertEquals(1, translog.stats().estimatedNumberOfOperations()); - assertBusy(() -> assertEquals(4, translog.allUploaded().size())); + assertBusy(() -> { + assertEquals(4, translog.allUploaded().size()); + assertEquals( + 4, + blobStoreTransferService.listAll(getTranslogDirectory().add(DATA_DIR).add(String.valueOf(primaryTerm.get()))).size() + ); + }); + } public void testMetadataFileDeletion() throws Exception { @@ -1273,49 +1397,10 @@ public void testTranslogWriter() throws IOException { } public void testTranslogWriterCanFlushInAddOrReadCall() throws IOException { - Path tempDir = createTempDir(); - final TranslogConfig temp = getTranslogConfig(tempDir); - final TranslogConfig config = new TranslogConfig( - temp.getShardId(), - temp.getTranslogPath(), - temp.getIndexSettings(), - temp.getBigArrays(), - new ByteSizeValue(1, ByteSizeUnit.KB), - "" - ); - + final TranslogConfig config = getConfig(1); final Set persistedSeqNos = new HashSet<>(); - final AtomicInteger writeCalls = new AtomicInteger(); - - final ChannelFactory channelFactory = (file, openOption) -> { - FileChannel delegate = FileChannel.open(file, openOption); - boolean success = false; - try { - // don't do partial writes for checkpoints we rely on the fact that the bytes are written as an atomic operation - final boolean isCkpFile = file.getFileName().toString().endsWith(".ckp"); - - final FileChannel channel; - if (isCkpFile) { - channel = delegate; - } else { - channel = new FilterFileChannel(delegate) { - - @Override - public int write(ByteBuffer src) throws IOException { - writeCalls.incrementAndGet(); - return super.write(src); - } - }; - } - success = true; - return channel; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(delegate); - } - } - }; - + writeCalls = new AtomicInteger(); + final ChannelFactory channelFactory = getChannelFactory(); String translogUUID = Translog.createEmptyTranslog( config.getTranslogPath(), SequenceNumbers.NO_OPS_PERFORMED, diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index c114b56bd0b39..c5f36fcc01983 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -43,6 +43,7 @@ import org.opensearch.common.blobstore.fs.FsBlobStore; import org.opensearch.common.blobstore.stream.read.ReadContext; import org.opensearch.common.blobstore.stream.write.WriteContext; +import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.compress.DeflateCompressor; import org.opensearch.common.io.Streams; import org.opensearch.common.io.stream.BytesStreamOutput; @@ -65,8 +66,13 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; +import org.mockito.ArgumentCaptor; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.greaterThan; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; public class BlobStoreFormatTests extends OpenSearchTestCase { @@ -128,44 +134,36 @@ public void testBlobStoreAsyncOperations() throws IOException, InterruptedExcept BlobPath.cleanPath(), null ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); - + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); CountDownLatch latch = new CountDownLatch(2); - ActionListener actionListener = new ActionListener<>() { - @Override - public void onResponse(Void unused) { - logger.info("---> Async write succeeded"); - latch.countDown(); - } - - @Override - public void onFailure(Exception e) { - logger.info("---> Failure in async write"); - throw new RuntimeException("async write should not fail"); - } - }; - // Write blobs in different formats checksumSMILE.writeAsync( new BlobObj("checksum smile"), - mockBlobContainer, + spyContainer, "check-smile", CompressorRegistry.none(), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), - mockBlobContainer, + spyContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener, + getVoidActionListener(latch), ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); + verify(spyContainer, times(2)).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(2, writeContextArgumentCaptor.getAllValues().size()); + writeContextArgumentCaptor.getAllValues() + .forEach(writeContext -> assertEquals(WritePriority.NORMAL, writeContext.getWritePriority())); // Assert that all checksum blobs can be read assertEquals(checksumSMILE.read(mockBlobContainer.getDelegate(), "check-smile", xContentRegistry()).getText(), "checksum smile"); assertEquals( @@ -174,6 +172,39 @@ public void onFailure(Exception e) { ); } + public void testBlobStorePriorityAsyncOperation() throws IOException, InterruptedException { + BlobStore blobStore = createTestBlobStore(); + MockFsVerifyingBlobContainer mockBlobContainer = new MockFsVerifyingBlobContainer( + (FsBlobStore) blobStore, + BlobPath.cleanPath(), + null + ); + MockFsVerifyingBlobContainer spyContainer = spy(mockBlobContainer); + ChecksumBlobStoreFormat checksumSMILE = new ChecksumBlobStoreFormat<>(BLOB_CODEC, "%s", BlobObj::fromXContent); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + ArgumentCaptor writeContextArgumentCaptor = ArgumentCaptor.forClass(WriteContext.class); + CountDownLatch latch = new CountDownLatch(1); + + // Write blobs in different formats + checksumSMILE.writeAsyncWithUrgentPriority( + new BlobObj("cluster state diff"), + spyContainer, + "cluster-state-diff", + CompressorRegistry.none(), + getVoidActionListener(latch), + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS + ); + latch.await(); + + verify(spyContainer).asyncBlobUpload(writeContextArgumentCaptor.capture(), actionListenerArgumentCaptor.capture()); + assertEquals(WritePriority.URGENT, writeContextArgumentCaptor.getValue().getWritePriority()); + assertEquals( + checksumSMILE.read(mockBlobContainer.getDelegate(), "cluster-state-diff", xContentRegistry()).getText(), + "cluster state diff" + ); + } + public void testBlobStoreOperations() throws IOException { BlobStore blobStore = createTestBlobStore(); BlobContainer blobContainer = blobStore.blobContainer(BlobPath.cleanPath()); @@ -228,6 +259,24 @@ public void testBlobCorruption() throws IOException { } } + private ActionListener getVoidActionListener(CountDownLatch latch) { + ActionListener actionListener = new ActionListener<>() { + @Override + public void onResponse(Void unused) { + logger.info("---> Async write succeeded"); + latch.countDown(); + } + + @Override + public void onFailure(Exception e) { + logger.info("---> Failure in async write"); + throw new RuntimeException("async write should not fail"); + } + }; + + return actionListener; + } + protected BlobStore createTestBlobStore() throws IOException { return new FsBlobStore(randomIntBetween(1, 8) * 1024, createTempDir(), false); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 2f9f38d18a064..b7a2baacba611 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -178,6 +178,7 @@ import org.opensearch.gateway.TransportNodesListGatewayStartedShards; import org.opensearch.index.IndexingPressureService; import org.opensearch.index.SegmentReplicationPressureService; +import org.opensearch.index.SegmentReplicationStatsTracker; import org.opensearch.index.analysis.AnalysisRegistry; import org.opensearch.index.remote.RemoteStorePressureService; import org.opensearch.index.remote.RemoteStoreStatsTrackerFactory; @@ -2124,7 +2125,8 @@ public void onFailure(final Exception e) { shardStateAction, actionFilters, new IndexingPressureService(settings, clusterService), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ), new GlobalCheckpointSyncAction( @@ -2187,10 +2189,12 @@ public void onFailure(final Exception e) { clusterService, mock(IndicesService.class), mock(ShardStateAction.class), + mock(SegmentReplicationStatsTracker.class), mock(ThreadPool.class) ), mock(RemoteStorePressureService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ); actions.put( BulkAction.INSTANCE, @@ -2214,7 +2218,8 @@ public void onFailure(final Exception e) { new AutoCreateIndex(settings, clusterSettings, indexNameExpressionResolver, new SystemIndices(emptyMap())), new IndexingPressureService(settings, clusterService), mock(IndicesService.class), - new SystemIndices(emptyMap()) + new SystemIndices(emptyMap()), + NoopTracer.INSTANCE ) ); final RestoreService restoreService = new RestoreService( diff --git a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java index ba2d4b8c247bb..19271bbf30e80 100644 --- a/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java +++ b/server/src/test/java/org/opensearch/threadpool/ScalingThreadPoolTests.java @@ -154,7 +154,7 @@ private int expectedSize(final String threadPoolName, final int numberOfProcesso sizes.put(ThreadPool.Names.TRANSLOG_SYNC, n -> 4 * n); sizes.put(ThreadPool.Names.REMOTE_PURGE, ThreadPool::halfAllocatedProcessorsMaxFive); sizes.put(ThreadPool.Names.REMOTE_REFRESH_RETRY, ThreadPool::halfAllocatedProcessorsMaxTen); - sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::halfAllocatedProcessorsMaxTen); + sizes.put(ThreadPool.Names.REMOTE_RECOVERY, ThreadPool::twiceAllocatedProcessors); return sizes.get(threadPoolName).apply(numberOfProcessors); } diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 60a54110fd0b4..2ba4de5e54a67 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -122,6 +122,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), nodeStats.getSearchPipelineStats(), + nodeStats.getSegmentReplicationRejectionStats(), nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index d24cc24d28579..28d7706fb1493 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -1016,6 +1016,11 @@ public void setLastAcceptedState(ClusterState clusterState) { delegate.setLastAcceptedState(clusterState); } + @Override + public PersistedStateStats getStats() { + return null; + } + @Override public void close() { assertTrue(openPersistedStates.remove(this)); diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 898e125b94954..952cd6c085966 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1871,6 +1871,18 @@ public void stopAllNodes() { } } + /** + * Replace all nodes by stopping all current node and starting new node. + * Used for remote store test cases, where remote state is restored. + */ + public void resetCluster() { + int totalClusterManagerNodes = numClusterManagerNodes(); + int totalDataNodes = numDataNodes(); + stopAllNodes(); + startClusterManagerOnlyNodes(totalClusterManagerNodes); + startDataOnlyNodes(totalDataNodes); + } + private synchronized void startAndPublishNodesAndClients(List nodeAndClients) { if (nodeAndClients.size() > 0) { final int newClusterManagers = (int) nodeAndClients.stream() @@ -2722,6 +2734,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index c16cc1d2a5fba..ad27d9834f159 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -789,6 +789,30 @@ protected Settings featureFlagSettings() { return featureSettings.build(); } + /** + * Represent if it needs to trigger remote state restore or not. + * For tests with remote store enabled domain, it will be overridden to true. + * + * @return if needs to perform remote state restore or not + */ + protected boolean triggerRemoteStateRestore() { + return false; + } + + /** + * For tests with remote cluster state, it will reset the cluster and cluster state will be + * restored from remote. + */ + protected void performRemoteStoreTestAction() { + if (triggerRemoteStateRestore()) { + String clusterUUIDBefore = clusterService().state().metadata().clusterUUID(); + internalCluster().resetCluster(); + String clusterUUIDAfter = clusterService().state().metadata().clusterUUID(); + // assertion that UUID is changed post restore. + assertFalse(clusterUUIDBefore.equals(clusterUUIDAfter)); + } + } + /** * Creates one or more indices and asserts that the indices are acknowledged. If one of the indices * already exists this method will fail and wipe all the indices created so far.