diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f8c27fa4f3dd..85aa6dec2fa80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Allow mmap to use new JDK-19 preview APIs in Apache Lucene 9.4+ ([#5151](https://github.com/opensearch-project/OpenSearch/pull/5151)) - Add events correlation engine plugin ([#6854](https://github.com/opensearch-project/OpenSearch/issues/6854)) - Introduce new dynamic cluster setting to control slice computation for concurrent segment search ([#9107](https://github.com/opensearch-project/OpenSearch/pull/9107)) -- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679)) +- Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679), [#10664](https://github.com/opensearch-project/OpenSearch/pull/10664)) - Provide service accounts tokens to extensions ([#9618](https://github.com/opensearch-project/OpenSearch/pull/9618)) +- [Admission control] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10541](https://github.com/opensearch-project/OpenSearch/pull/10541)) - [Admission control] Add Resource usage collector service and resource usage tracker ([#9890](https://github.com/opensearch-project/OpenSearch/pull/9890)) +- [Remote cluster state] Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) +- [Remote cluster state] Upload global metadata in cluster state to remote store([#10404](https://github.com/opensearch-project/OpenSearch/pull/10404)) +- [Remote cluster state] Download functionality of global metadata from remote store ([#10535](https://github.com/opensearch-project/OpenSearch/pull/10535)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -86,6 +90,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Added - Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) - Add search query categorizer ([#10255](https://github.com/opensearch-project/OpenSearch/pull/10255)) +- [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) ### Dependencies - Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) @@ -96,18 +101,23 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump `de.thetaphi:forbiddenapis` from 3.5.1 to 3.6 ([#10508](https://github.com/opensearch-project/OpenSearch/pull/10508)) - Bump `commons-io:commons-io` from 2.13.0 to 2.14.0 ([#10294](https://github.com/opensearch-project/OpenSearch/pull/10294)) - Bump `org.codehaus.woodstox:stax2-api` from 4.2.1 to 4.2.2 ([#10639](https://github.com/opensearch-project/OpenSearch/pull/10639)) +- Bump `com.google.http-client:google-http-client` from 1.43.2 to 1.43.3 ([#10635](https://github.com/opensearch-project/OpenSearch/pull/10635)) +- Bump `com.squareup.okio:okio` from 3.5.0 to 3.6.0 ([#10637](https://github.com/opensearch-project/OpenSearch/pull/10637)) ### Changed - Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) - Force merge with `only_expunge_deletes` honors max segment size ([#10036](https://github.com/opensearch-project/OpenSearch/pull/10036)) - Add the means to extract the contextual properties from HttpChannel, TcpCChannel and TrasportChannel without excessive typecasting ([#10562](https://github.com/opensearch-project/OpenSearch/pull/10562)) - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) +- [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) +- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) ### Deprecated ### Removed ### Fixed +- Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) - Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) - Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) - Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496)) diff --git a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java index b6dc0ceb1028f..828d4b7de450e 100644 --- a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java @@ -231,7 +231,10 @@ public Map parse(String inputString) { int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed // if a match is found jump forward to the end of the match - for (; i < input.length; i++) { + while (i < input.length) { + // start is only used to record the value of i + int start = i; + lookAheadMatches = 0; // potential match between delimiter and input string if (delimiter.length > 0 && input[i] == delimiter[0]) { @@ -283,8 +286,14 @@ public Map parse(String inputString) { delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); // i is always one byte after the last found delimiter, aka the start of the next value valueStart = i; + } else { + i++; } + } else { + i++; } + // i should change anyway + assert (i != start); } // the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) // and there is no trailing delimiter diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java index 2f70c28efb1cd..fb3dec8152b4f 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java @@ -10,14 +10,12 @@ import org.opensearch.common.annotation.ExperimentalApi; -import java.io.Closeable; - /** * Interface for metrics telemetry providers * * @opensearch.experimental */ @ExperimentalApi -public interface MetricsTelemetry extends MetricsRegistry, Closeable { +public interface MetricsTelemetry extends MetricsRegistry { } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java index 79b7e4aca6c2f..a3bb64ea392a9 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java @@ -85,6 +85,11 @@ public SpanScope withSpanInScope(Span span) { return DefaultSpanScope.create(span, tracerContextStorage).attach(); } + @Override + public boolean isRecording() { + return true; + } + private Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) { return tracingTelemetry.createSpan(spanCreationContext, parentSpan); } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java index e6d4878a5e833..8257d251e9560 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java @@ -53,4 +53,10 @@ public interface Tracer extends HttpTracer, Closeable { */ SpanScope withSpanInScope(Span span); + /** + * Tells if the traces are being recorded or not + * @return boolean + */ + boolean isRecording(); + } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java index c073e8d3e766f..50452ff5fe3b4 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java @@ -54,6 +54,11 @@ public SpanScope withSpanInScope(Span span) { return SpanScope.NO_OP; } + @Override + public boolean isRecording() { + return false; + } + @Override public void close() { diff --git a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java index 0a717e993cb81..2a791f1ae4164 100644 --- a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java +++ b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java @@ -62,6 +62,7 @@ public void testCreateSpan() { String spanName = defaultTracer.getCurrentSpan().getSpan().getSpanName(); assertEquals("span_name", spanName); + assertTrue(defaultTracer.isRecording()); } @SuppressWarnings("unchecked") diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java index ca0c0df40f009..e42a1147825d1 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java @@ -155,4 +155,28 @@ public void testNullValueWithOutIgnoreMissing() { IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); } + + public void testMatchEmptyBrackets() { + IngestDocument ingestDocument = new IngestDocument( + "_index", + "_id", + null, + null, + null, + Collections.singletonMap("message", "[foo],[bar],[]") + ); + DissectProcessor dissectProcessor = new DissectProcessor("", null, "message", "[%{a}],[%{b}],[%{c}]", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("foo", ingestDocument.getFieldValue("a", String.class)); + assertEquals("bar", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + + ingestDocument = new IngestDocument("_index", "_id", null, null, null, Collections.singletonMap("message", "{}{}{}{baz}")); + dissectProcessor = new DissectProcessor("", null, "message", "{%{a}}{%{b}}{%{c}}{%{d}}", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("", ingestDocument.getFieldValue("a", String.class)); + assertEquals("", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + assertEquals("baz", ingestDocument.getFieldValue("d", String.class)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml index 916a7fe656cc2..d90e5fbf2362b 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml @@ -84,3 +84,38 @@ teardown: } ] } + +--- +"Test dissect processor can match empty brackets": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "dissect" : { + "field" : "message", + "pattern" : "[%{a}][%{b}][%{c}]" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: {message: "[foo][bar][]"} + + - do: + get: + index: test + id: 1 + - match: { _source.message: "[foo][bar][]" } + - match: { _source.a: "foo" } + - match: { _source.b: "bar" } + - match: { _source.c: "" } diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index da4978608a12f..1bef5146f1db9 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -75,8 +75,8 @@ dependencies { runtimeOnly "com.google.guava:guava:${versions.guava}" api 'com.google.guava:failureaccess:1.0.1' - api 'com.google.http-client:google-http-client:1.43.2' - api 'com.google.http-client:google-http-client-appengine:1.43.2' + api 'com.google.http-client:google-http-client:1.43.3' + api 'com.google.http-client:google-http-client-appengine:1.43.3' api 'com.google.http-client:google-http-client-gson:1.43.3' api 'com.google.http-client:google-http-client-jackson2:1.43.3' diff --git a/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 deleted file mode 100644 index a576a74c62542..0000000000000 --- a/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2520469ebd8c0675f0d2aeafd2da665228320fcf \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 new file mode 100644 index 0000000000000..800467de8bdf3 --- /dev/null +++ b/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 @@ -0,0 +1 @@ +a758b82e55a2f5f681e289c5ed384d3dbda6f3cd \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 deleted file mode 100644 index d8a9dba20070b..0000000000000 --- a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9fb548c5264227813fd83991b94a705b0841c15f \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 new file mode 100644 index 0000000000000..4adcca6a55902 --- /dev/null +++ b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 @@ -0,0 +1 @@ +09d6cbdde6ea3469a67601a811b4e83de3e68a79 \ No newline at end of file diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java index 445e1d65f3d3e..c9ebb3acaf3e5 100644 --- a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java @@ -228,7 +228,7 @@ StorageOptions createStorageOptions( } storageOptionsBuilder.setCredentials(serviceAccountCredentials); } - return storageOptionsBuilder.build(); + return SocketAccess.doPrivilegedException(() -> storageOptionsBuilder.build()); } /** diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java index 197e772df30d5..35127d6ea4060 100644 --- a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java @@ -32,6 +32,7 @@ package org.opensearch.repositories.gcs; +import org.apache.logging.log4j.core.util.Throwables; import org.opensearch.SpecialPermission; import org.opensearch.common.CheckedRunnable; @@ -71,4 +72,16 @@ public static void doPrivilegedVoidIOException(CheckedRunnable acti throw (IOException) e.getCause(); } } + + public static T doPrivilegedException(PrivilegedExceptionAction operation) { + SpecialPermission.check(); + try { + return AccessController.doPrivileged(operation); + } catch (PrivilegedActionException e) { + Throwables.rethrow(e.getCause()); + assert false : "always throws"; + return null; + } + } + } diff --git a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java index 5f88ad7867513..1361f3165b653 100644 --- a/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java +++ b/plugins/repository-s3/src/internalClusterTest/java/org/opensearch/repositories/s3/S3BlobStoreRepositoryTests.java @@ -37,6 +37,8 @@ import software.amazon.awssdk.core.internal.http.pipeline.stages.ApplyTransactionIdStage; +import org.opensearch.action.admin.indices.forcemerge.ForceMergeResponse; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.SuppressForbidden; @@ -51,10 +53,15 @@ import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.repositories.Repository; +import org.opensearch.repositories.RepositoryMissingException; +import org.opensearch.repositories.RepositoryStats; import org.opensearch.repositories.blobstore.BlobStoreRepository; import org.opensearch.repositories.blobstore.OpenSearchMockAPIBasedRepositoryIntegTestCase; import org.opensearch.repositories.s3.utils.AwsRequestSigner; import org.opensearch.snapshots.mockstore.BlobStoreWrapper; +import org.opensearch.test.BackgroundIndexer; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; @@ -63,12 +70,18 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; +import java.util.stream.StreamSupport; import fixture.s3.S3HttpHandler; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount; import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; @SuppressForbidden(reason = "this test uses a HttpServer to emulate an S3 endpoint") // Need to set up a new cluster for each test because cluster settings use randomized authentication settings @@ -152,6 +165,66 @@ protected Settings nodeSettings(int nodeOrdinal) { return builder.build(); } + @Override + public void testRequestStats() throws Exception { + final String repository = createRepository(randomName()); + final String index = "index-no-merges"; + createIndex( + index, + Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + + final long nbDocs = randomLongBetween(10_000L, 20_000L); + try (BackgroundIndexer indexer = new BackgroundIndexer(index, "_doc", client(), (int) nbDocs)) { + waitForDocs(nbDocs, indexer); + } + + flushAndRefresh(index); + ForceMergeResponse forceMerge = client().admin().indices().prepareForceMerge(index).setFlush(true).setMaxNumSegments(1).get(); + assertThat(forceMerge.getSuccessfulShards(), equalTo(1)); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + final String snapshot = "snapshot"; + assertSuccessfulSnapshot( + client().admin().cluster().prepareCreateSnapshot(repository, snapshot).setWaitForCompletion(true).setIndices(index) + ); + + assertAcked(client().admin().indices().prepareDelete(index)); + + assertSuccessfulRestore(client().admin().cluster().prepareRestoreSnapshot(repository, snapshot).setWaitForCompletion(true)); + ensureGreen(index); + assertHitCount(client().prepareSearch(index).setSize(0).setTrackTotalHits(true).get(), nbDocs); + + assertAcked(client().admin().cluster().prepareDeleteSnapshot(repository, snapshot).get()); + + final RepositoryStats repositoryStats = StreamSupport.stream( + internalCluster().getInstances(RepositoriesService.class).spliterator(), + false + ).map(repositoriesService -> { + try { + return repositoriesService.repository(repository); + } catch (RepositoryMissingException e) { + return null; + } + }).filter(Objects::nonNull).map(Repository::stats).reduce(RepositoryStats::merge).get(); + + Map> extendedStats = repositoryStats.extendedStats; + Map aggregatedStats = new HashMap<>(); + extendedStats.forEach((k, v) -> { + if (k == BlobStore.Metric.RETRY_COUNT || k == BlobStore.Metric.REQUEST_SUCCESS || k == BlobStore.Metric.REQUEST_FAILURE) { + for (Map.Entry entry : v.entrySet()) { + aggregatedStats.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + } + + }); + final Map mockCalls = getMockRequestCounts(); + + String assertionErrorMsg = String.format("SDK sent [%s] calls and handler measured [%s] calls", aggregatedStats, mockCalls); + + assertEquals(assertionErrorMsg, mockCalls, aggregatedStats); + } + /** * S3RepositoryPlugin that allows to disable chunked encoding and to set a low threshold between single upload and multipart upload. */ @@ -263,6 +336,8 @@ public void maybeTrack(final String request, Headers requestHeaders) { trackRequest("PutMultipartObject"); } else if (Regex.simpleMatch("PUT /*/*", request)) { trackRequest("PutObject"); + } else if (Regex.simpleMatch("POST /*?delete*", request)) { + trackRequest("DeleteObjects"); } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java index 9777bd974d56c..24aee99242957 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobContainer.java @@ -199,7 +199,7 @@ public void asyncBlobUpload(WriteContext writeContext, ActionListener comp ? amazonS3Reference.get().priorityClient() : amazonS3Reference.get().client(); CompletableFuture completableFuture = blobStore.getAsyncTransferManager() - .uploadObject(s3AsyncClient, uploadRequest, streamContext); + .uploadObject(s3AsyncClient, uploadRequest, streamContext, blobStore.getStatsMetricPublisher()); completableFuture.whenComplete((response, throwable) -> { if (throwable == null) { completionListener.onResponse(response); @@ -384,7 +384,7 @@ private void doDeleteBlobs(List blobNames, boolean relative) throws IOEx assert outstanding.isEmpty(); } - private static DeleteObjectsRequest bulkDelete(String bucket, List blobs) { + private DeleteObjectsRequest bulkDelete(String bucket, List blobs) { return DeleteObjectsRequest.builder() .bucket(bucket) .delete( @@ -393,6 +393,7 @@ private static DeleteObjectsRequest bulkDelete(String bucket, List blobs .quiet(true) .build() ) + .overrideConfiguration(o -> o.addMetricPublisher(blobStore.getStatsMetricPublisher().deleteObjectsMetricPublisher)) .build(); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java index 80005d92344a4..f568d871dd31a 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3BlobStore.java @@ -47,6 +47,8 @@ import org.opensearch.repositories.s3.async.AsyncTransferManager; import java.io.IOException; +import java.util.Collections; +import java.util.HashMap; import java.util.Locale; import java.util.Map; @@ -180,6 +182,16 @@ public Map stats() { return statsMetricPublisher.getStats().toMap(); } + @Override + public Map> extendedStats() { + if (statsMetricPublisher.getExtendedStats() == null || statsMetricPublisher.getExtendedStats().isEmpty()) { + return Collections.emptyMap(); + } + Map> extendedStats = new HashMap<>(); + statsMetricPublisher.getExtendedStats().forEach((k, v) -> extendedStats.put(k, v.toMap())); + return extendedStats; + } + public ObjectCannedACL getCannedACL() { return cannedACL; } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index a80ee0ca35fae..c6450e49d08e2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -38,6 +38,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.core.common.io.stream.NamedWriteableRegistry; import org.opensearch.core.xcontent.NamedXContentRegistry; @@ -55,6 +56,7 @@ import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; +import org.opensearch.threadpool.ScalingExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -93,17 +95,21 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) { @Override public List> getExecutorBuilders(Settings settings) { List> executorBuilders = new ArrayList<>(); + int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings)); executorBuilders.add( new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION) ); - executorBuilders.add( - new FixedExecutorBuilder(settings, PRIORITY_STREAM_READER, priorityPoolCount(settings), 10_000, PRIORITY_STREAM_READER) - ); + executorBuilders.add(new ScalingExecutorBuilder(PRIORITY_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); + executorBuilders.add(new FixedExecutorBuilder(settings, FUTURE_COMPLETION, normalPoolCount(settings), 10_000, FUTURE_COMPLETION)); - executorBuilders.add(new FixedExecutorBuilder(settings, STREAM_READER, normalPoolCount(settings), 10_000, STREAM_READER)); + executorBuilders.add(new ScalingExecutorBuilder(STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5))); return executorBuilders; } + static int halfAllocatedProcessorsMaxFive(final int allocatedProcessors) { + return boundedBy((allocatedProcessors + 1) / 2, 1, 5); + } + S3RepositoryPlugin(final Settings settings, final Path configPath, final S3Service service, final S3AsyncService s3AsyncService) { this.service = Objects.requireNonNull(service, "S3 service must not be null"); this.configPath = configPath; diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java index cad0037f99249..0c63bfdb1ff97 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/StatsMetricPublisher.java @@ -8,10 +8,13 @@ package org.opensearch.repositories.s3; -import software.amazon.awssdk.http.HttpMetric; import software.amazon.awssdk.metrics.MetricCollection; import software.amazon.awssdk.metrics.MetricPublisher; +import software.amazon.awssdk.metrics.MetricRecord; +import org.opensearch.common.blobstore.BlobStore; + +import java.time.Duration; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicLong; @@ -20,18 +23,67 @@ public class StatsMetricPublisher { private final Stats stats = new Stats(); + private final Map extendedStats = new HashMap<>() { + { + put(BlobStore.Metric.REQUEST_LATENCY, new Stats()); + put(BlobStore.Metric.REQUEST_SUCCESS, new Stats()); + put(BlobStore.Metric.REQUEST_FAILURE, new Stats()); + put(BlobStore.Metric.RETRY_COUNT, new Stats()); + } + }; + public MetricPublisher listObjectsMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.listCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).listMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).listMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).listMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).listMetrics.addAndGet(1); + } + stats.listMetrics.addAndGet(1); + break; + } + } + } + + @Override + public void close() {} + }; + + public MetricPublisher deleteObjectsMetricPublisher = new MetricPublisher() { + @Override + public void publish(MetricCollection metricCollection) { + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).deleteMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).deleteMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).deleteMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).deleteMetrics.addAndGet(1); + } + stats.deleteMetrics.addAndGet(1); + break; + } + } } @Override @@ -41,15 +93,26 @@ public void close() {} public MetricPublisher getObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.getCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).getMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).getMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).getMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).getMetrics.addAndGet(1); + } + stats.getMetrics.addAndGet(1); + break; + } + } } @Override @@ -59,15 +122,26 @@ public void close() {} public MetricPublisher putObjectMetricPublisher = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.putCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).putMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).putMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).putMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).putMetrics.addAndGet(1); + } + stats.putMetrics.addAndGet(1); + break; + } + } } @Override @@ -77,15 +151,26 @@ public void close() {} public MetricPublisher multipartUploadMetricCollector = new MetricPublisher() { @Override public void publish(MetricCollection metricCollection) { - stats.postCount.addAndGet( - metricCollection.children() - .stream() - .filter( - metricRecords -> metricRecords.name().equals("ApiCallAttempt") - && !metricRecords.metricValues(HttpMetric.HTTP_STATUS_CODE).isEmpty() - ) - .count() - ); + for (MetricRecord metricRecord : metricCollection) { + switch (metricRecord.metric().name()) { + case "ApiCallDuration": + extendedStats.get(BlobStore.Metric.REQUEST_LATENCY).multiPartPutMetrics.addAndGet( + ((Duration) metricRecord.value()).toMillis() + ); + break; + case "RetryCount": + extendedStats.get(BlobStore.Metric.RETRY_COUNT).multiPartPutMetrics.addAndGet(((Integer) metricRecord.value())); + break; + case "ApiCallSuccessful": + if ((Boolean) metricRecord.value()) { + extendedStats.get(BlobStore.Metric.REQUEST_SUCCESS).multiPartPutMetrics.addAndGet(1); + } else { + extendedStats.get(BlobStore.Metric.REQUEST_FAILURE).multiPartPutMetrics.addAndGet(1); + } + stats.multiPartPutMetrics.addAndGet(1); + break; + } + } } @Override @@ -96,22 +181,29 @@ public Stats getStats() { return stats; } + public Map getExtendedStats() { + return extendedStats; + } + static class Stats { - final AtomicLong listCount = new AtomicLong(); + final AtomicLong listMetrics = new AtomicLong(); + + final AtomicLong getMetrics = new AtomicLong(); - final AtomicLong getCount = new AtomicLong(); + final AtomicLong putMetrics = new AtomicLong(); - final AtomicLong putCount = new AtomicLong(); + final AtomicLong deleteMetrics = new AtomicLong(); - final AtomicLong postCount = new AtomicLong(); + final AtomicLong multiPartPutMetrics = new AtomicLong(); Map toMap() { final Map results = new HashMap<>(); - results.put("GetObject", getCount.get()); - results.put("ListObjects", listCount.get()); - results.put("PutObject", putCount.get()); - results.put("PutMultipartObject", postCount.get()); + results.put("GetObject", getMetrics.get()); + results.put("ListObjects", listMetrics.get()); + results.put("PutObject", putMetrics.get()); + results.put("DeleteObjects", deleteMetrics.get()); + results.put("PutMultipartObject", multiPartPutMetrics.get()); return results; } } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java index ad6939ce299d6..86bb70e5a40a2 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java @@ -23,9 +23,11 @@ import org.opensearch.common.StreamContext; import org.opensearch.common.blobstore.stream.write.WritePriority; import org.opensearch.common.io.InputStreamContainer; +import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -142,7 +144,9 @@ private static void uploadPart( () -> s3AsyncClient.uploadPart( uploadPartRequest, AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)), inputStreamContainer.getContentLength(), streamReadExecutor ) diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java index 8d45c2167a3d1..db04636b89d50 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java @@ -35,8 +35,10 @@ import org.opensearch.common.util.ByteUtils; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.s3.SocketAccess; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.repositories.s3.io.CheckedContainer; +import java.io.BufferedInputStream; import java.io.IOException; import java.util.Arrays; import java.util.Base64; @@ -86,16 +88,21 @@ public AsyncTransferManager(long minimumPartSize, ExecutorService executorServic * @param streamContext The {@link StreamContext} to supply streams during upload * @return A {@link CompletableFuture} to listen for upload completion */ - public CompletableFuture uploadObject(S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext) { + public CompletableFuture uploadObject( + S3AsyncClient s3AsyncClient, + UploadRequest uploadRequest, + StreamContext streamContext, + StatsMetricPublisher statsMetricPublisher + ) { CompletableFuture returnFuture = new CompletableFuture<>(); try { if (streamContext.getNumberOfParts() == 1) { log.debug(() -> "Starting the upload as a single upload part request"); - uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture); + uploadInOneChunk(s3AsyncClient, uploadRequest, streamContext.provideStream(0), returnFuture, statsMetricPublisher); } else { log.debug(() -> "Starting the upload as multipart upload request"); - uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture); + uploadInParts(s3AsyncClient, uploadRequest, streamContext, returnFuture, statsMetricPublisher); } } catch (Throwable throwable) { returnFuture.completeExceptionally(throwable); @@ -108,12 +115,14 @@ private void uploadInParts( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, StreamContext streamContext, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { CreateMultipartUploadRequest.Builder createMultipartUploadRequestBuilder = CreateMultipartUploadRequest.builder() .bucket(uploadRequest.getBucket()) - .key(uploadRequest.getKey()); + .key(uploadRequest.getKey()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.multipartUploadMetricCollector)); if (uploadRequest.doRemoteDataIntegrityCheck()) { createMultipartUploadRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); } @@ -286,12 +295,14 @@ private void uploadInOneChunk( S3AsyncClient s3AsyncClient, UploadRequest uploadRequest, InputStreamContainer inputStreamContainer, - CompletableFuture returnFuture + CompletableFuture returnFuture, + StatsMetricPublisher statsMetricPublisher ) { PutObjectRequest.Builder putObjectRequestBuilder = PutObjectRequest.builder() .bucket(uploadRequest.getBucket()) .key(uploadRequest.getKey()) - .contentLength(uploadRequest.getContentLength()); + .contentLength(uploadRequest.getContentLength()) + .overrideConfiguration(o -> o.addMetricPublisher(statsMetricPublisher.putObjectMetricPublisher)); if (uploadRequest.doRemoteDataIntegrityCheck()) { putObjectRequestBuilder.checksumAlgorithm(ChecksumAlgorithm.CRC32); putObjectRequestBuilder.checksumCRC32(base64StringFromLong(uploadRequest.getExpectedChecksum())); @@ -303,7 +314,9 @@ private void uploadInOneChunk( () -> s3AsyncClient.putObject( putObjectRequestBuilder.build(), AsyncRequestBody.fromInputStream( - inputStreamContainer.getInputStream(), + // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered + // data can be retried instead of retrying whole file by the application. + new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)), inputStreamContainer.getContentLength(), streamReadExecutor ) diff --git a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java index 9c07b929052bc..607453986ab16 100644 --- a/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java +++ b/plugins/repository-s3/src/test/java/org/opensearch/repositories/s3/async/AsyncTransferManagerTests.java @@ -33,6 +33,7 @@ import org.opensearch.common.io.InputStreamContainer; import org.opensearch.core.common.unit.ByteSizeUnit; import org.opensearch.repositories.blobstore.ZeroInputStream; +import org.opensearch.repositories.s3.StatsMetricPublisher; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -80,7 +81,8 @@ public void testOneChunkUpload() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1 - ) + ), + new StatsMetricPublisher() ); try { @@ -118,7 +120,8 @@ public void testOneChunkUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 1 - ) + ), + new StatsMetricPublisher() ); try { @@ -169,7 +172,8 @@ public void testMultipartUpload() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5 - ) + ), + new StatsMetricPublisher() ); try { @@ -219,7 +223,8 @@ public void testMultipartUploadCorruption() { ByteSizeUnit.MB.toBytes(1), ByteSizeUnit.MB.toBytes(1), 5 - ) + ), + new StatsMetricPublisher() ); try { diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java similarity index 85% rename from plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java rename to plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java index ed4d13f3abb7d..45caf8bf5f60b 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java @@ -6,12 +6,9 @@ * compatible open source license. */ -package org.opensearch.telemetry.tracing; +package org.opensearch.telemetry; import org.opensearch.common.settings.Settings; -import org.opensearch.telemetry.OTelTelemetryPlugin; -import org.opensearch.telemetry.Telemetry; -import org.opensearch.telemetry.TelemetrySettings; import java.util.Optional; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java new file mode 100644 index 0000000000000..74fc872cb30e3 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import java.util.Collection; +import java.util.List; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.testing.exporter.InMemoryMetricExporter; + +public class InMemorySingletonMetricsExporter implements MetricExporter { + + public static final InMemorySingletonMetricsExporter INSTANCE = new InMemorySingletonMetricsExporter(InMemoryMetricExporter.create()); + + private static InMemoryMetricExporter delegate; + + public static InMemorySingletonMetricsExporter create() { + return INSTANCE; + } + + private InMemorySingletonMetricsExporter(InMemoryMetricExporter delegate) { + InMemorySingletonMetricsExporter.delegate = delegate; + } + + @Override + public CompletableResultCode export(Collection metrics) { + return delegate.export(metrics); + } + + @Override + public CompletableResultCode flush() { + return delegate.flush(); + } + + @Override + public CompletableResultCode shutdown() { + return delegate.shutdown(); + } + + public List getFinishedMetricItems() { + return delegate.getFinishedMetricItems(); + } + + /** + * Clears the state. + */ + public void reset() { + delegate.reset(); + } + + @Override + public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { + return delegate.getAggregationTemporality(instrumentType); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java new file mode 100644 index 0000000000000..bcdcb657c4f42 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.telemetry.metrics.noop.NoopCounter; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.Arrays; +import java.util.Collection; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, minNumDataNodes = 1) +public class TelemetryMetricsDisabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testSanityChecksWhenMetricsDisabled() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + + Thread.sleep(2000); + + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + assertTrue(counter instanceof NoopCounter); + } + +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java new file mode 100644 index 0000000000000..ed341595d327d --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Collectors; + +import io.opentelemetry.sdk.metrics.data.DoublePointData; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 1) +public class TelemetryMetricsEnabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), true) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testCounter() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(1.0, value, 0.0); + } + + public void testUpDownCounter() throws Exception { + + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createUpDownCounter("test-up-down-counter", "test", "1"); + counter.add(1.0); + counter.add(-2.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-up-down-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(-1.0, value, 0.0); + } + + @After + public void reset() { + InMemorySingletonMetricsExporter.INSTANCE.reset(); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java index 949a58f6cab41..45ed140e1be94 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.test.OpenSearchIntegTestCase; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java index 8a49a0abf5512..f07f2b308e801 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.telemetry.tracing.attributes.Attributes; @@ -88,9 +89,7 @@ public void testSanityChecksWhenTracingEnabled() throws Exception { ); InMemorySingletonSpanExporter exporter = InMemorySingletonSpanExporter.INSTANCE; - if (!exporter.getFinishedSpanItems().isEmpty()) { - validators.validate(exporter.getFinishedSpanItems(), 6); - } + validators.validate(exporter.getFinishedSpanItems(), 6); } private static void updateTelemetrySetting(Client client, boolean value) { diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java index b57876c9310f3..297ae8873636f 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java @@ -8,14 +8,13 @@ package org.opensearch.telemetry; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.TelemetryPlugin; -import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; import org.opensearch.telemetry.tracing.OTelResourceProvider; import org.opensearch.telemetry.tracing.OTelTelemetry; -import org.opensearch.telemetry.tracing.OTelTracingTelemetry; import java.util.Arrays; import java.util.List; @@ -37,6 +36,8 @@ public class OTelTelemetryPlugin extends Plugin implements TelemetryPlugin { private final Settings settings; + private RefCountedReleasable refCountedOpenTelemetry; + /** * Creates Otel plugin * @param settings cluster settings @@ -58,20 +59,32 @@ public List> getSettings() { @Override public Optional getTelemetry(TelemetrySettings telemetrySettings) { + initializeOpenTelemetrySdk(telemetrySettings); return Optional.of(telemetry(telemetrySettings)); } + private void initializeOpenTelemetrySdk(TelemetrySettings telemetrySettings) { + if (refCountedOpenTelemetry != null) { + return; + } + OpenTelemetrySdk openTelemetrySdk = OTelResourceProvider.get(telemetrySettings, settings); + refCountedOpenTelemetry = new RefCountedReleasable<>("openTelemetry", openTelemetrySdk, openTelemetrySdk::close); + } + @Override public String getName() { return OTEL_TRACER_NAME; } private Telemetry telemetry(TelemetrySettings telemetrySettings) { - final OpenTelemetrySdk openTelemetry = OTelResourceProvider.get(telemetrySettings, settings); - return new OTelTelemetry( - new OTelTracingTelemetry<>(openTelemetry, openTelemetry.getSdkTracerProvider()), - new OTelMetricsTelemetry<>(openTelemetry.getSdkMeterProvider()) - ); + return new OTelTelemetry(refCountedOpenTelemetry); + } + + @Override + public void close() { + if (refCountedOpenTelemetry != null) { + refCountedOpenTelemetry.close(); + } } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java index 8598e5976d20d..6160e5106c041 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; @@ -19,19 +20,24 @@ import io.opentelemetry.api.metrics.DoubleUpDownCounter; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.api.metrics.MeterProvider; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel implementation for {@link MetricsTelemetry} */ public class OTelMetricsTelemetry implements MetricsTelemetry { + private final RefCountedReleasable refCountedOpenTelemetry; private final Meter otelMeter; private final T meterProvider; /** * Creates OTel based {@link MetricsTelemetry}. + * @param openTelemetry open telemetry. * @param meterProvider {@link MeterProvider} instance */ - public OTelMetricsTelemetry(T meterProvider) { + public OTelMetricsTelemetry(RefCountedReleasable openTelemetry, T meterProvider) { + this.refCountedOpenTelemetry = openTelemetry; + this.refCountedOpenTelemetry.incRef(); this.meterProvider = meterProvider; this.otelMeter = meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -63,5 +69,6 @@ public Counter createUpDownCounter(String name, String description, String unit) @Override public void close() throws IOException { meterProvider.close(); + refCountedOpenTelemetry.close(); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java index 282fabd43346b..0c697d2cc5e8c 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java @@ -8,34 +8,39 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.Telemetry; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; + +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * Otel implementation of Telemetry */ public class OTelTelemetry implements Telemetry { - private final TracingTelemetry tracingTelemetry; - private final MetricsTelemetry metricsTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; /** * Creates Telemetry instance - * @param tracingTelemetry tracing telemetry - * @param metricsTelemetry metrics telemetry + + */ + /** + * Creates Telemetry instance + * @param refCountedOpenTelemetry open telemetry. */ - public OTelTelemetry(TracingTelemetry tracingTelemetry, MetricsTelemetry metricsTelemetry) { - this.tracingTelemetry = tracingTelemetry; - this.metricsTelemetry = metricsTelemetry; + public OTelTelemetry(RefCountedReleasable refCountedOpenTelemetry) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; } @Override public TracingTelemetry getTracingTelemetry() { - return tracingTelemetry; + return new OTelTracingTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkTracerProvider()); } @Override public MetricsTelemetry getMetricsTelemetry() { - return metricsTelemetry; + return new OTelMetricsTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkMeterProvider()); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java index f88afe623fd56..af39617a8c744 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java @@ -8,31 +8,33 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; import java.io.IOException; -import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel based Telemetry provider */ public class OTelTracingTelemetry implements TracingTelemetry { - private final OpenTelemetry openTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; private final T tracerProvider; private final io.opentelemetry.api.trace.Tracer otelTracer; /** * Creates OTel based {@link TracingTelemetry} - * @param openTelemetry OpenTelemetry instance + * @param refCountedOpenTelemetry OpenTelemetry instance * @param tracerProvider {@link TracerProvider} instance. */ - public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { - this.openTelemetry = openTelemetry; + public OTelTracingTelemetry(RefCountedReleasable refCountedOpenTelemetry, T tracerProvider) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; + this.refCountedOpenTelemetry.incRef(); this.tracerProvider = tracerProvider; this.otelTracer = tracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -40,6 +42,7 @@ public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { @Override public void close() throws IOException { tracerProvider.close(); + refCountedOpenTelemetry.close(); } @Override @@ -49,7 +52,7 @@ public Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) @Override public TracingContextPropagator getContextPropagator() { - return new OTelTracingContextPropagator(openTelemetry); + return new OTelTracingContextPropagator(refCountedOpenTelemetry.get()); } private Span createOtelSpan(SpanCreationContext spanCreationContext, Span parentSpan) { diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java index 233c93e6b9a36..9de575b69774a 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java @@ -8,11 +8,13 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.test.OpenSearchTestCase; +import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.metrics.DoubleCounter; import io.opentelemetry.api.metrics.DoubleCounterBuilder; import io.opentelemetry.api.metrics.DoubleUpDownCounter; @@ -34,12 +36,16 @@ public void testCounter() { String description = "test"; String unit = "1"; Meter mockMeter = mock(Meter.class); + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); DoubleCounterBuilder mockOTelDoubleCounterBuilder = mock(DoubleCounterBuilder.class); MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -59,6 +65,7 @@ public void testCounterNegativeValue() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); @@ -66,7 +73,10 @@ public void testCounterNegativeValue() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -83,6 +93,7 @@ public void testUpDownCounter() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleUpDownCounter mockOTelUpDownDoubleCounter = mock(DoubleUpDownCounter.class); LongUpDownCounterBuilder mockOTelLongUpDownCounterBuilder = mock(LongUpDownCounterBuilder.class); @@ -90,7 +101,10 @@ public void testUpDownCounter() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.upDownCounterBuilder(counterName)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setDescription(description)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongUpDownCounterBuilder); diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java index 1a508ed252493..1f0c2f674e655 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.tracing.attributes.Attributes; import org.opensearch.test.OpenSearchTestCase; @@ -37,7 +38,10 @@ public void testCreateSpanWithoutParent() { when(mockSpanBuilder.startSpan()).thenReturn(mock(io.opentelemetry.api.trace.Span.class)); when(mockSpanBuilder.setSpanKind(any(io.opentelemetry.api.trace.SpanKind.class))).thenReturn(mockSpanBuilder); Attributes attributes = Attributes.create().addAttribute("name", "value"); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), null); verify(mockSpanBuilder, never()).setParent(any()); verify(mockSpanBuilder).setAllAttributes(createAttribute(attributes)); @@ -59,7 +63,10 @@ public void testCreateSpanWithParent() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create().addAttribute("name", 1l); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), parentSpan); @@ -85,7 +92,10 @@ public void testCreateSpanWithParentWithMultipleAttributes() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create() .addAttribute("key1", 1l) .addAttribute("key2", 2.0) @@ -125,7 +135,10 @@ public void testGetContextPropagator() { TracerProvider mockTracerProvider = mock(TracerProvider.class); when(mockTracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockTracer); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); assertTrue(tracingTelemetry.getContextPropagator() instanceof OTelTracingContextPropagator); } diff --git a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java index 6fcc89cfe9e9a..7304304e522f8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/gateway/remote/RemoteClusterStateServiceIT.java @@ -86,10 +86,10 @@ public void testFullClusterRestoreStaleDelete() throws Exception { assertEquals(10, repository.blobStore().blobContainer(baseMetadataPath.add("manifest")).listBlobsByPrefix("manifest").size()); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( cluster().getClusterName(), getClusterState().metadata().clusterUUID() - ); + ).getIndices(); assertEquals(0, indexMetadataMap.values().stream().findFirst().get().getNumberOfReplicas()); assertEquals(shardCount, indexMetadataMap.values().stream().findFirst().get().getNumberOfShards()); } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index e2ef5f85abc74..bccca283ba772 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -353,7 +353,13 @@ public void assertRemoteStoreRepositoryOnAllNodes(String repositoryName) { // Validated that all the restricted settings are entact on all the nodes. repository.getRestrictedSystemRepositorySettings() .stream() - .forEach(setting -> assertEquals(setting.get(actualRepository.settings()), setting.get(expectedRepository.settings()))); + .forEach( + setting -> assertEquals( + String.format(Locale.ROOT, "Restricted Settings mismatch [%s]", setting.getKey()), + setting.get(actualRepository.settings()), + setting.get(expectedRepository.settings()) + ) + ); } } diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java index 5e92bb195680b..3df4cc4e34d93 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreClusterStateRestoreIT.java @@ -8,23 +8,22 @@ package org.opensearch.remotestore; -import org.opensearch.action.admin.cluster.remotestore.restore.RestoreRemoteStoreResponse; -import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; -import org.opensearch.action.support.PlainActionFuture; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.settings.Settings; +import org.opensearch.gateway.remote.ClusterMetadataManifest; +import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.gateway.remote.RemoteClusterStateService; import org.opensearch.test.OpenSearchIntegTestCase; import java.io.IOException; import java.nio.file.Files; -import java.util.Locale; +import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.concurrent.ExecutionException; import static org.opensearch.gateway.remote.RemoteClusterStateService.REMOTE_CLUSTER_STATE_ENABLED_SETTING; -import static org.opensearch.indices.ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE; -import static org.opensearch.indices.ShardLimitValidator.SETTING_MAX_SHARDS_PER_CLUSTER_KEY; @OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 0) public class RemoteStoreClusterStateRestoreIT extends BaseRemoteStoreRestoreIT { @@ -48,47 +47,10 @@ private Map initialTestSetup(int shardCount, int replicaCount, int private void resetCluster(int dataNodeCount, int clusterManagerNodeCount) { internalCluster().stopAllNodes(); - addNewNodes(dataNodeCount, clusterManagerNodeCount); + internalCluster().startClusterManagerOnlyNodes(clusterManagerNodeCount); + internalCluster().startDataOnlyNodes(dataNodeCount); } - private void restoreAndValidate(String clusterUUID, Map indexStats) throws Exception { - restoreAndValidate(clusterUUID, indexStats, true); - } - - private void restoreAndValidate(String clusterUUID, Map indexStats, boolean validate) throws Exception { - // TODO once auto restore is merged, the remote cluster state will be restored - - if (validate) { - // Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - verifyRestoredData(indexStats, INDEX_NAME); - } - } - - private void restoreAndValidateFails( - String clusterUUID, - PlainActionFuture actionListener, - Class clazz, - String errorSubString - ) { - - try { - restoreAndValidate(clusterUUID, null, false); - } catch (Exception e) { - assertTrue( - String.format(Locale.ROOT, "%s %s", clazz, e), - clazz.isAssignableFrom(e.getClass()) - || clazz.isAssignableFrom(e.getCause().getClass()) - || (e.getCause().getCause() != null && clazz.isAssignableFrom(e.getCause().getCause().getClass())) - ); - assertTrue( - String.format(Locale.ROOT, "Error message mismatch. Expected: [%s]. Actual: [%s]", errorSubString, e.getMessage()), - e.getMessage().contains(errorSubString) - ); - } - } - - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestore() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -106,10 +68,10 @@ public void testFullClusterRestore() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore and validate - restoreAndValidate(prevClusterUUID, indexStats); + validateMetadata(List.of(INDEX_NAME)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") public void testFullClusterRestoreMultipleIndices() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; @@ -134,155 +96,100 @@ public void testFullClusterRestoreMultipleIndices() throws Exception { assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; // Step - 3 Trigger full cluster restore - restoreAndValidate(prevClusterUUID, indexStats); - ensureGreen(secondIndexName); - verifyRestoredData(indexStats2, secondIndexName); + validateMetadata(List.of(INDEX_NAME, secondIndexName)); + verifyRestoredData(indexStats, INDEX_NAME); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreFailureValidationFailures() throws Exception { + public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); int clusterManagerNodeCount = 1; - // index some data to generate files in remote directory - Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - String prevClusterUUID = clusterService().state().metadata().clusterUUID(); - - // Start of Test - 1 - // Test - 1 Trigger full cluster restore and validate it fails due to incorrect cluster UUID - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails("randomUUID", future, IllegalStateException.class, "Remote Cluster State not found - randomUUID"); - // End of Test - 1 + // Step - 1 index some data to generate files in remote directory + initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); - // Start of Test - 3 - // Test - 2 Trigger full cluster restore and validate it fails due to cluster UUID same as current cluster UUID - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - clusterService().state().metadata().clusterUUID(), - future, - IllegalArgumentException.class, - "clusterUUID to restore from should be different from current cluster UUID" - ); - // End of Test - 2 + String prevClusterUUID = clusterService().state().metadata().clusterUUID(); + String clusterName = clusterService().state().getClusterName().value(); - // Start of Test - 3 // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - // Restarting cluster with just 1 data node helps with applying cluster settings - resetCluster(1, clusterManagerNodeCount); - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - reduceShardLimits(1, 1); - - // Step - 4 Trigger full cluster restore and validate it fails - future = PlainActionFuture.newFuture(); - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalArgumentException.class, - "this action would add [2] total shards, but this cluster currently has [0]/[1] maximum shards open" - ); - resetShardLimits(); - // End of Test - 3 - - // Start of Test - 4 - // Test -4 Reset cluster and trigger full restore with same name index in the cluster - // Test -4 Add required nodes for this test after last reset. - addNewNodes(dataNodeCount - 1, 0); - - newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - // Test -4 Step - 2 Create a new index with same name - createIndex(INDEX_NAME, remoteStoreIndexSettings(0, 1)); - ensureYellowAndNoInitializingShards(INDEX_NAME); - ensureGreen(INDEX_NAME); - - future = PlainActionFuture.newFuture(); - - // Test -4 Step - 3 Trigger full cluster restore and validate fails - restoreAndValidateFails( - prevClusterUUID, - future, - IllegalStateException.class, - "cannot restore index [remote-store-test-idx-1] because an open index with same name/uuid already exists in the cluster" - ); + internalCluster().stopAllNodes(); + // Step - 3 Delete index metadata file in remote + try { + Files.move( + segmentRepoPath.resolve( + RemoteClusterStateService.encodeString(clusterName) + "/cluster-state/" + prevClusterUUID + "/index" + ), + segmentRepoPath.resolve("cluster-state/") + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + assertThrows(IllegalStateException.class, () -> addNewNodes(dataNodeCount, clusterManagerNodeCount)); + // Test is complete - // Test -4 Step - 4 validation restore is successful. - ensureGreen(INDEX_NAME); - // End of Test - 4 + // Starting a node without remote state to ensure test cleanup + internalCluster().startNode(Settings.builder().put(REMOTE_CLUSTER_STATE_ENABLED_SETTING.getKey(), false).build()); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/9834") - public void testFullClusterRestoreManifestFilePointsToInvalidIndexMetadataPathThrowsException() throws Exception { + public void testRemoteStateFullRestart() throws Exception { int shardCount = randomIntBetween(1, 2); int replicaCount = 1; int dataNodeCount = shardCount * (replicaCount + 1); - int clusterManagerNodeCount = 1; - - // Step - 1 index some data to generate files in remote directory - initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); + int clusterManagerNodeCount = 3; + Map indexStats = initialTestSetup(shardCount, replicaCount, dataNodeCount, clusterManagerNodeCount); String prevClusterUUID = clusterService().state().metadata().clusterUUID(); - - // Step - 2 Replace all nodes in the cluster with new nodes. This ensures new cluster state doesn't have previous index metadata - resetCluster(dataNodeCount, clusterManagerNodeCount); - - String newClusterUUID = clusterService().state().metadata().clusterUUID(); - assert !Objects.equals(newClusterUUID, prevClusterUUID) : "cluster restart not successful. cluster uuid is same"; - - // Step - 4 Delete index metadata file in remote + // Delete index metadata file in remote try { Files.move( segmentRepoPath.resolve( RemoteClusterStateService.encodeString(clusterService().state().getClusterName().value()) + "/cluster-state/" + prevClusterUUID - + "/index" + + "/manifest" ), segmentRepoPath.resolve("cluster-state/") ); } catch (IOException e) { throw new RuntimeException(e); } - - // Step - 5 Trigger full cluster restore and validate fails - PlainActionFuture future = PlainActionFuture.newFuture(); - restoreAndValidateFails(prevClusterUUID, future, IllegalStateException.class, "asdsa"); + internalCluster().fullRestart(); + ensureGreen(INDEX_NAME); + String newClusterUUID = clusterService().state().metadata().clusterUUID(); + assert Objects.equals(newClusterUUID, prevClusterUUID) : "Full restart not successful. cluster uuid has changed"; + validateCurrentMetadata(); + verifyRestoredData(indexStats, INDEX_NAME); } - private void reduceShardLimits(int maxShardsPerNode, int maxShardsPerCluster) { - // Step 3 - Reduce shard limits to hit shard limit with less no of shards - try { - client().admin() - .cluster() - .updateSettings( - new ClusterUpdateSettingsRequest().transientSettings( - Settings.builder() - .put(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey(), maxShardsPerNode) - .put(SETTING_MAX_SHARDS_PER_CLUSTER_KEY, maxShardsPerCluster) - ) - ) - .get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); + private void validateMetadata(List indexNames) { + assertEquals(clusterService().state().metadata().indices().size(), indexNames.size()); + for (String indexName : indexNames) { + assertTrue(clusterService().state().metadata().hasIndex(indexName)); } } - private void resetShardLimits() { - // Step - 5 Reset the cluster settings - ClusterUpdateSettingsRequest resetRequest = new ClusterUpdateSettingsRequest(); - resetRequest.transientSettings( - Settings.builder().putNull(SETTING_CLUSTER_MAX_SHARDS_PER_NODE.getKey()).putNull(SETTING_MAX_SHARDS_PER_CLUSTER_KEY) + private void validateCurrentMetadata() throws Exception { + RemoteClusterStateService remoteClusterStateService = internalCluster().getInstance( + RemoteClusterStateService.class, + internalCluster().getClusterManagerName() ); - - try { - client().admin().cluster().updateSettings(resetRequest).get(); - } catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } + assertBusy(() -> { + ClusterMetadataManifest manifest = remoteClusterStateService.getLatestClusterMetadataManifest( + getClusterState().getClusterName().value(), + getClusterState().metadata().clusterUUID() + ).get(); + ClusterState clusterState = getClusterState(); + Metadata currentMetadata = clusterState.metadata(); + assertEquals(currentMetadata.indices().size(), manifest.getIndices().size()); + assertEquals(currentMetadata.coordinationMetadata().term(), manifest.getClusterTerm()); + assertEquals(clusterState.version(), manifest.getStateVersion()); + assertEquals(clusterState.stateUUID(), manifest.getStateUUID()); + assertEquals(currentMetadata.clusterUUIDCommitted(), manifest.isClusterUUIDCommitted()); + for (UploadedIndexMetadata uploadedIndexMetadata : manifest.getIndices()) { + IndexMetadata currentIndexMetadata = currentMetadata.index(uploadedIndexMetadata.getIndexName()); + assertEquals(currentIndexMetadata.getIndex().getUUID(), uploadedIndexMetadata.getIndexUUID()); + } + }); } - } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 69efea186d927..6ce6ca40cbce4 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -57,6 +57,7 @@ import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; import org.opensearch.node.NodesResourceUsageStats; +import org.opensearch.repositories.RepositoriesStats; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; import org.opensearch.search.backpressure.stats.SearchBackpressureStats; @@ -146,6 +147,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private NodesResourceUsageStats resourceUsageStats; + @Nullable + private RepositoriesStats repositoriesStats; + public NodeStats(StreamInput in) throws IOException { super(in); timestamp = in.readVLong(); @@ -207,6 +211,11 @@ public NodeStats(StreamInput in) throws IOException { } else { resourceUsageStats = null; } + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); + } else { + repositoriesStats = null; + } } public NodeStats( @@ -234,7 +243,8 @@ public NodeStats( @Nullable WeightedRoutingStats weightedRoutingStats, @Nullable FileCacheStats fileCacheStats, @Nullable TaskCancellationStats taskCancellationStats, - @Nullable SearchPipelineStats searchPipelineStats + @Nullable SearchPipelineStats searchPipelineStats, + @Nullable RepositoriesStats repositoriesStats ) { super(node); this.timestamp = timestamp; @@ -261,6 +271,7 @@ public NodeStats( this.fileCacheStats = fileCacheStats; this.taskCancellationStats = taskCancellationStats; this.searchPipelineStats = searchPipelineStats; + this.repositoriesStats = repositoriesStats; } public long getTimestamp() { @@ -403,6 +414,11 @@ public SearchPipelineStats getSearchPipelineStats() { return searchPipelineStats; } + @Nullable + public RepositoriesStats getRepositoriesStats() { + return repositoriesStats; + } + @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -449,6 +465,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport out.writeOptionalWriteable(resourceUsageStats); } + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(repositoriesStats); + } } @Override @@ -542,6 +561,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getResourceUsageStats() != null) { getResourceUsageStats().toXContent(builder, params); } + if (getRepositoriesStats() != null) { + getRepositoriesStats().toXContent(builder, params); + } return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 99c9fb2d1e26a..88dff20354aa2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -214,7 +214,8 @@ public enum Metric { FILE_CACHE_STATS("file_cache"), TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), - RESOURCE_USAGE_STATS("resource_usage_stats"); + RESOURCE_USAGE_STATS("resource_usage_stats"), + REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 204157236a282..aa02f8e580f4a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -125,7 +125,8 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.FILE_CACHE_STATS.containedIn(metrics), NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), - NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics) + NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), + NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java index d8323e209be23..f51fabbfb2388 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/stats/TransportClusterStatsAction.java @@ -169,6 +169,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq false, false, false, + false, false ); List shardsStats = new ArrayList<>(); diff --git a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java index 2ee3e9557b354..0f6646d37f950 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/BlobStore.java @@ -49,6 +49,9 @@ public interface BlobStore extends Closeable { */ BlobContainer blobContainer(BlobPath path); + /** + * Returns statistics on the count of operations that have been performed on this blob store + */ /** * Returns statistics on the count of operations that have been performed on this blob store */ @@ -56,8 +59,36 @@ default Map stats() { return Collections.emptyMap(); } + /** + * Returns details statistics of operations that have been performed on this blob store + */ + default Map> extendedStats() { + return Collections.emptyMap(); + } + /** * Reload the blob store inplace */ default void reload(RepositoryMetadata repositoryMetadata) {} + + /** + * Metrics for BlobStore interactions + */ + enum Metric { + REQUEST_SUCCESS("request_success_total"), + REQUEST_FAILURE("request_failures_total"), + REQUEST_LATENCY("request_time_in_millis"), + RETRY_COUNT("request_retry_count_total"); + + private String metricName; + + Metric(String name) { + this.metricName = name; + } + + public String metricName() { + return this.metricName; + } + } + } diff --git a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java index 4d2d69e473438..a18ca8b9d5c39 100644 --- a/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java +++ b/server/src/main/java/org/opensearch/common/blobstore/EncryptedBlobStore.java @@ -75,6 +75,16 @@ public Map stats() { return blobStore.stats(); } + /** + * Retrieves extended statistics about the BlobStore. Delegates the call to the underlying BlobStore's extendedStats() method. + * + * @return A map containing extended statistics about the BlobStore. + */ + @Override + public Map> extendedStats() { + return blobStore.extendedStats(); + } + /** * Closes the EncryptedBlobStore by decrementing the reference count of the CryptoManager and closing the * underlying BlobStore. This ensures proper cleanup of resources. diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 554d651bdf23d..88d5e1fd2dfcb 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -701,6 +701,12 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_SETTING ), List.of(FeatureFlags.TELEMETRY), - List.of(TelemetrySettings.TRACER_ENABLED_SETTING, TelemetrySettings.TRACER_SAMPLER_PROBABILITY) + List.of( + TelemetrySettings.TRACER_ENABLED_SETTING, + TelemetrySettings.TRACER_SAMPLER_PROBABILITY, + TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING, + TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING, + TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING + ) ); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java index 40b16f3d6323b..97b37d9532f85 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java +++ b/server/src/main/java/org/opensearch/gateway/remote/ClusterMetadataManifest.java @@ -33,6 +33,9 @@ */ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { + public static final int CODEC_V0 = 0; // Older codec version, where we haven't introduced codec versions for manifest. + public static final int CODEC_V1 = 1; // In Codec V1 we have introduced global-metadata and codec version in Manifest file. + private static final ParseField CLUSTER_TERM_FIELD = new ParseField("cluster_term"); private static final ParseField STATE_VERSION_FIELD = new ParseField("state_version"); private static final ParseField CLUSTER_UUID_FIELD = new ParseField("cluster_uuid"); @@ -40,6 +43,8 @@ public class ClusterMetadataManifest implements Writeable, ToXContentFragment { private static final ParseField OPENSEARCH_VERSION_FIELD = new ParseField("opensearch_version"); private static final ParseField NODE_ID_FIELD = new ParseField("node_id"); private static final ParseField COMMITTED_FIELD = new ParseField("committed"); + private static final ParseField CODEC_VERSION_FIELD = new ParseField("codec_version"); + private static final ParseField GLOBAL_METADATA_FIELD = new ParseField("global_metadata"); private static final ParseField INDICES_FIELD = new ParseField("indices"); private static final ParseField PREVIOUS_CLUSTER_UUID = new ParseField("previous_cluster_uuid"); private static final ParseField CLUSTER_UUID_COMMITTED = new ParseField("cluster_uuid_committed"); @@ -84,7 +89,33 @@ private static boolean clusterUUIDCommitted(Object[] fields) { return (boolean) fields[9]; } - private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + private static int codecVersion(Object[] fields) { + return (int) fields[10]; + } + + private static String globalMetadataFileName(Object[] fields) { + return (String) fields[11]; + } + + private static final ConstructingObjectParser PARSER_V0 = new ConstructingObjectParser<>( + "cluster_metadata_manifest", + fields -> new ClusterMetadataManifest( + term(fields), + version(fields), + clusterUUID(fields), + stateUUID(fields), + opensearchVersion(fields), + nodeId(fields), + committed(fields), + CODEC_V0, + null, + indices(fields), + previousClusterUUID(fields), + clusterUUIDCommitted(fields) + ) + ); + + private static final ConstructingObjectParser PARSER_V1 = new ConstructingObjectParser<>( "cluster_metadata_manifest", fields -> new ClusterMetadataManifest( term(fields), @@ -94,29 +125,45 @@ private static boolean clusterUUIDCommitted(Object[] fields) { opensearchVersion(fields), nodeId(fields), committed(fields), + codecVersion(fields), + globalMetadataFileName(fields), indices(fields), previousClusterUUID(fields), clusterUUIDCommitted(fields) ) ); + private static final ConstructingObjectParser CURRENT_PARSER = PARSER_V1; + static { - PARSER.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); - PARSER.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); - PARSER.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); - PARSER.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); - PARSER.declareObjectArray( + declareParser(PARSER_V0, CODEC_V0); + declareParser(PARSER_V1, CODEC_V1); + } + + private static void declareParser(ConstructingObjectParser parser, long codec_version) { + parser.declareLong(ConstructingObjectParser.constructorArg(), CLUSTER_TERM_FIELD); + parser.declareLong(ConstructingObjectParser.constructorArg(), STATE_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), STATE_UUID_FIELD); + parser.declareInt(ConstructingObjectParser.constructorArg(), OPENSEARCH_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), NODE_ID_FIELD); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), COMMITTED_FIELD); + parser.declareObjectArray( ConstructingObjectParser.constructorArg(), (p, c) -> UploadedIndexMetadata.fromXContent(p), INDICES_FIELD ); - PARSER.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); - PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + parser.declareString(ConstructingObjectParser.constructorArg(), PREVIOUS_CLUSTER_UUID); + parser.declareBoolean(ConstructingObjectParser.constructorArg(), CLUSTER_UUID_COMMITTED); + + if (codec_version >= CODEC_V1) { + parser.declareInt(ConstructingObjectParser.constructorArg(), CODEC_VERSION_FIELD); + parser.declareString(ConstructingObjectParser.constructorArg(), GLOBAL_METADATA_FIELD); + } } + private final int codecVersion; + private final String globalMetadataFileName; private final List indices; private final long clusterTerm; private final long stateVersion; @@ -168,6 +215,14 @@ public boolean isClusterUUIDCommitted() { return clusterUUIDCommitted; } + public int getCodecVersion() { + return codecVersion; + } + + public String getGlobalMetadataFileName() { + return globalMetadataFileName; + } + public ClusterMetadataManifest( long clusterTerm, long version, @@ -176,6 +231,8 @@ public ClusterMetadataManifest( Version opensearchVersion, String nodeId, boolean committed, + int codecVersion, + String globalMetadataFileName, List indices, String previousClusterUUID, boolean clusterUUIDCommitted @@ -187,6 +244,8 @@ public ClusterMetadataManifest( this.opensearchVersion = opensearchVersion; this.nodeId = nodeId; this.committed = committed; + this.codecVersion = codecVersion; + this.globalMetadataFileName = globalMetadataFileName; this.indices = Collections.unmodifiableList(indices); this.previousClusterUUID = previousClusterUUID; this.clusterUUIDCommitted = clusterUUIDCommitted; @@ -203,6 +262,13 @@ public ClusterMetadataManifest(StreamInput in) throws IOException { this.indices = Collections.unmodifiableList(in.readList(UploadedIndexMetadata::new)); this.previousClusterUUID = in.readString(); this.clusterUUIDCommitted = in.readBoolean(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.codecVersion = in.readInt(); + this.globalMetadataFileName = in.readString(); + } else { + this.codecVersion = CODEC_V0; // Default codec + this.globalMetadataFileName = null; + } } public static Builder builder() { @@ -231,6 +297,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endArray(); builder.field(PREVIOUS_CLUSTER_UUID.getPreferredName(), getPreviousClusterUUID()); builder.field(CLUSTER_UUID_COMMITTED.getPreferredName(), isClusterUUIDCommitted()); + if (onOrAfterCodecVersion(CODEC_V1)) { + builder.field(CODEC_VERSION_FIELD.getPreferredName(), getCodecVersion()); + builder.field(GLOBAL_METADATA_FIELD.getPreferredName(), getGlobalMetadataFileName()); + } return builder; } @@ -246,6 +316,10 @@ public void writeTo(StreamOutput out) throws IOException { out.writeCollection(indices); out.writeString(previousClusterUUID); out.writeBoolean(clusterUUIDCommitted); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeInt(codecVersion); + out.writeString(globalMetadataFileName); + } } @Override @@ -266,12 +340,16 @@ public boolean equals(Object o) { && Objects.equals(nodeId, that.nodeId) && Objects.equals(committed, that.committed) && Objects.equals(previousClusterUUID, that.previousClusterUUID) - && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted); + && Objects.equals(clusterUUIDCommitted, that.clusterUUIDCommitted) + && Objects.equals(globalMetadataFileName, that.globalMetadataFileName) + && Objects.equals(codecVersion, that.codecVersion); } @Override public int hashCode() { return Objects.hash( + codecVersion, + globalMetadataFileName, indices, clusterTerm, stateVersion, @@ -290,8 +368,16 @@ public String toString() { return Strings.toString(MediaTypeRegistry.JSON, this); } + public boolean onOrAfterCodecVersion(int codecVersion) { + return this.codecVersion >= codecVersion; + } + + public static ClusterMetadataManifest fromXContentV0(XContentParser parser) throws IOException { + return PARSER_V0.parse(parser, null); + } + public static ClusterMetadataManifest fromXContent(XContentParser parser) throws IOException { - return PARSER.parse(parser, null); + return CURRENT_PARSER.parse(parser, null); } /** @@ -301,6 +387,8 @@ public static ClusterMetadataManifest fromXContent(XContentParser parser) throws */ public static class Builder { + private String globalMetadataFileName; + private int codecVersion; private List indices; private long clusterTerm; private long stateVersion; @@ -317,6 +405,16 @@ public Builder indices(List indices) { return this; } + public Builder codecVersion(int codecVersion) { + this.codecVersion = codecVersion; + return this; + } + + public Builder globalMetadataFileName(String globalMetadataFileName) { + this.globalMetadataFileName = globalMetadataFileName; + return this; + } + public Builder clusterTerm(long clusterTerm) { this.clusterTerm = clusterTerm; return this; @@ -378,6 +476,8 @@ public Builder(ClusterMetadataManifest manifest) { this.opensearchVersion = manifest.opensearchVersion; this.nodeId = manifest.nodeId; this.committed = manifest.committed; + this.globalMetadataFileName = manifest.globalMetadataFileName; + this.codecVersion = manifest.codecVersion; this.indices = new ArrayList<>(manifest.indices); this.previousClusterUUID = manifest.previousClusterUUID; this.clusterUUIDCommitted = manifest.clusterUUIDCommitted; @@ -392,6 +492,8 @@ public ClusterMetadataManifest build() { opensearchVersion, nodeId, committed, + codecVersion, + globalMetadataFileName, indices, previousClusterUUID, clusterUUIDCommitted diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 4a8a0618ffa60..358ce600a49d8 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -15,6 +15,7 @@ import org.opensearch.action.LatchedActionListener; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; import org.opensearch.common.Nullable; import org.opensearch.common.blobstore.BlobContainer; import org.opensearch.common.blobstore.BlobMetadata; @@ -27,6 +28,7 @@ import org.opensearch.common.util.io.IOUtils; import org.opensearch.core.action.ActionListener; import org.opensearch.core.index.Index; +import org.opensearch.core.xcontent.ToXContent; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.index.translog.transfer.BlobStoreTransferService; @@ -55,6 +57,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; import java.util.function.LongSupplier; import java.util.function.Supplier; @@ -80,7 +83,9 @@ public class RemoteClusterStateService implements Closeable { private static final Logger logger = LogManager.getLogger(RemoteClusterStateService.class); + // TODO make this two variable as dynamic setting [issue: #10688] public static final int INDEX_METADATA_UPLOAD_WAIT_MILLIS = 20000; + public static final int GLOBAL_METADATA_UPLOAD_WAIT_MILLIS = 20000; public static final ChecksumBlobStoreFormat INDEX_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( "index-metadata", @@ -88,11 +93,27 @@ public class RemoteClusterStateService implements Closeable { IndexMetadata::fromXContent ); + public static final ChecksumBlobStoreFormat GLOBAL_METADATA_FORMAT = new ChecksumBlobStoreFormat<>( + "metadata", + METADATA_NAME_FORMAT, + Metadata::fromXContent + ); + + /** + * Manifest format compatible with older codec v0, where codec version was missing. + */ + public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT_V0 = + new ChecksumBlobStoreFormat<>("cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContentV0); + + /** + * Manifest format compatible with codec v1, where we introduced codec versions/global metadata. + */ public static final ChecksumBlobStoreFormat CLUSTER_METADATA_MANIFEST_FORMAT = new ChecksumBlobStoreFormat<>( "cluster-metadata-manifest", METADATA_MANIFEST_NAME_FORMAT, ClusterMetadataManifest::fromXContent ); + /** * Used to specify if cluster state metadata should be published to remote store */ @@ -105,9 +126,11 @@ public class RemoteClusterStateService implements Closeable { public static final String CLUSTER_STATE_PATH_TOKEN = "cluster-state"; public static final String INDEX_PATH_TOKEN = "index"; + public static final String GLOBAL_METADATA_PATH_TOKEN = "global-metadata"; public static final String MANIFEST_PATH_TOKEN = "manifest"; public static final String MANIFEST_FILE_PREFIX = "manifest"; - public static final String INDEX_METADATA_FILE_PREFIX = "metadata"; + public static final String METADATA_FILE_PREFIX = "metadata"; + public static final int SPLITED_MANIFEST_FILE_LENGTH = 6; // file name manifest__term__version__C/P__timestamp__codecversion private final String nodeId; private final Supplier repositoriesService; @@ -120,6 +143,19 @@ public class RemoteClusterStateService implements Closeable { private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; + public static final int MANIFEST_CURRENT_CODEC_VERSION = ClusterMetadataManifest.CODEC_V1; + public static final int GLOBAL_METADATA_CURRENT_CODEC_VERSION = 1; + + // ToXContent Params with gateway mode. + // We are using gateway context mode to persist all custom metadata. + public static final ToXContent.Params FORMAT_PARAMS; + static { + Map params = new HashMap<>(1); + params.put(Metadata.CONTEXT_MODE_PARAM, Metadata.CONTEXT_MODE_GATEWAY); + FORMAT_PARAMS = new ToXContent.MapParams(params); + } + public RemoteClusterStateService( String nodeId, Supplier repositoriesService, @@ -159,12 +195,22 @@ public ClusterMetadataManifest writeFullMetadata(ClusterState clusterState, Stri return null; } + // TODO: we can upload global metadata and index metadata in parallel. [issue: #10645] + // Write globalMetadata + String globalMetadataFile = writeGlobalMetadata(clusterState); + // any validations before/after upload ? final List allUploadedIndexMetadata = writeIndexMetadataParallel( clusterState, new ArrayList<>(clusterState.metadata().indices().values()) ); - final ClusterMetadataManifest manifest = uploadManifest(clusterState, allUploadedIndexMetadata, previousClusterUUID, false); + final ClusterMetadataManifest manifest = uploadManifest( + clusterState, + allUploadedIndexMetadata, + previousClusterUUID, + globalMetadataFile, + false + ); final long durationMillis = TimeValue.nsecToMSec(relativeTimeNanosSupplier.getAsLong() - startTimeNanos); if (durationMillis >= slowWriteLoggingThreshold.getMillis()) { logger.warn( @@ -203,6 +249,22 @@ public ClusterMetadataManifest writeIncrementalMetadata( return null; } assert previousClusterState.metadata().coordinationMetadata().term() == clusterState.metadata().coordinationMetadata().term(); + + // Write Global Metadata + final boolean updateGlobalMetadata = Metadata.isGlobalStateEquals( + previousClusterState.metadata(), + clusterState.metadata() + ) == false; + String globalMetadataFile; + // For migration case from codec V0 to V1, we have added null check on global metadata file, + // If file is empty and codec is 1 then write global metadata. + if (updateGlobalMetadata || previousManifest.getGlobalMetadataFileName() == null) { + globalMetadataFile = writeGlobalMetadata(clusterState); + } else { + globalMetadataFile = previousManifest.getGlobalMetadataFileName(); + } + + // Write Index Metadata final Map previousStateIndexMetadataVersionByName = new HashMap<>(); for (final IndexMetadata indexMetadata : previousClusterState.metadata().indices().values()) { previousStateIndexMetadataVersionByName.put(indexMetadata.getIndex().getName(), indexMetadata.getVersion()); @@ -245,6 +307,7 @@ public ClusterMetadataManifest writeIncrementalMetadata( clusterState, new ArrayList<>(allUploadedIndexMetadata.values()), previousManifest.getPreviousClusterUUID(), + globalMetadataFile, false ); deleteStaleClusterMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID(), RETAINED_MANIFESTS); @@ -270,6 +333,59 @@ public ClusterMetadataManifest writeIncrementalMetadata( return manifest; } + /** + * Uploads provided ClusterState's global Metadata to remote store in parallel. + * The call is blocking so the method waits for upload to finish and then return. + * + * @param clusterState current ClusterState + * @return String file name where globalMetadata file is stored. + */ + private String writeGlobalMetadata(ClusterState clusterState) throws IOException { + + AtomicReference result = new AtomicReference(); + final BlobContainer globalMetadataContainer = globalMetadataContainer( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + final String globalMetadataFilename = globalMetadataFileName(clusterState.metadata()); + + // latch to wait until upload is not finished + CountDownLatch latch = new CountDownLatch(1); + + LatchedActionListener completionListener = new LatchedActionListener<>(ActionListener.wrap(resp -> { + logger.trace(String.format(Locale.ROOT, "GlobalMetadata uploaded successfully.")); + result.set(globalMetadataContainer.path().buildAsString() + globalMetadataFilename); + }, ex -> { throw new GlobalMetadataTransferException(ex.getMessage(), ex); }), latch); + + GLOBAL_METADATA_FORMAT.writeAsync( + clusterState.metadata(), + globalMetadataContainer, + globalMetadataFilename, + blobStoreRepository.getCompressor(), + completionListener, + FORMAT_PARAMS + ); + + try { + if (latch.await(GLOBAL_METADATA_UPLOAD_WAIT_MILLIS, TimeUnit.MILLISECONDS) == false) { + // TODO: We should add metrics where transfer is timing out. [Issue: #10687] + GlobalMetadataTransferException ex = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete") + ); + throw ex; + } + } catch (InterruptedException ex) { + GlobalMetadataTransferException exception = new GlobalMetadataTransferException( + String.format(Locale.ROOT, "Timed out waiting for transfer of global metadata to complete - %s"), + ex + ); + Thread.currentThread().interrupt(); + throw exception; + } + + return result.get(); + } + /** * Uploads provided IndexMetadata's to remote store in parallel. The call is blocking so the method waits for upload to finish and then return. * @@ -378,7 +494,8 @@ private void writeIndexMetadataAsync( indexMetadataContainer, indexMetadataFilename, blobStoreRepository.getCompressor(), - completionListener + completionListener, + FORMAT_PARAMS ); } @@ -395,6 +512,7 @@ public ClusterMetadataManifest markLastStateAsCommitted(ClusterState clusterStat clusterState, previousManifest.getIndices(), previousManifest.getPreviousClusterUUID(), + previousManifest.getGlobalMetadataFileName(), true ); deleteStaleClusterUUIDs(clusterState, committedManifest); @@ -423,10 +541,11 @@ private ClusterMetadataManifest uploadManifest( ClusterState clusterState, List uploadedIndexMetadata, String previousClusterUUID, + String globalClusterMetadataFileName, boolean committed ) throws IOException { synchronized (this) { - final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version()); + final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version(), committed); final ClusterMetadataManifest manifest = new ClusterMetadataManifest( clusterState.term(), clusterState.getVersion(), @@ -435,6 +554,8 @@ private ClusterMetadataManifest uploadManifest( Version.CURRENT, nodeId, committed, + MANIFEST_CURRENT_CODEC_VERSION, + globalClusterMetadataFileName, uploadedIndexMetadata, previousClusterUUID, clusterState.metadata().clusterUUIDCommitted() @@ -466,6 +587,12 @@ private BlobContainer indexMetadataContainer(String clusterName, String clusterU .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(INDEX_PATH_TOKEN).add(indexUUID)); } + private BlobContainer globalMetadataContainer(String clusterName, String clusterUUID) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/ + return blobStoreRepository.blobStore() + .blobContainer(getCusterMetadataBasePath(clusterName, clusterUUID).add(GLOBAL_METADATA_PATH_TOKEN)); + } + private BlobContainer manifestContainer(String clusterName, String clusterUUID) { // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest return blobStoreRepository.blobStore().blobContainer(getManifestFolderPath(clusterName, clusterUUID)); @@ -488,22 +615,41 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } - private static String getManifestFileName(long term, long version) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637_456536447 - return String.join(DELIMITER, getManifestFileNamePrefix(term, version), RemoteStoreUtils.invertLong(System.currentTimeMillis())); + static String getManifestFileName(long term, long version, boolean committed) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ + return String.join( + DELIMITER, + MANIFEST_PATH_TOKEN, + RemoteStoreUtils.invertLong(term), + RemoteStoreUtils.invertLong(version), + (committed ? "C" : "P"), // C for committed and P for published + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(MANIFEST_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last place to + // determine codec version. + ); } - private static String getManifestFileNamePrefix(long term, long version) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637 - return String.join(DELIMITER, MANIFEST_PATH_TOKEN, RemoteStoreUtils.invertLong(term), RemoteStoreUtils.invertLong(version)); + static String indexMetadataFileName(IndexMetadata indexMetadata) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index//metadata______ + return String.join( + DELIMITER, + METADATA_FILE_PREFIX, + RemoteStoreUtils.invertLong(indexMetadata.getVersion()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last + // place to determine codec version. + ); } - private static String indexMetadataFileName(IndexMetadata indexMetadata) { + private static String globalMetadataFileName(Metadata metadata) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/global-metadata/metadata______ return String.join( DELIMITER, - INDEX_METADATA_FILE_PREFIX, - String.valueOf(indexMetadata.getVersion()), - String.valueOf(System.currentTimeMillis()) + METADATA_FILE_PREFIX, + RemoteStoreUtils.invertLong(metadata.version()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(GLOBAL_METADATA_CURRENT_CODEC_VERSION) ); } @@ -516,18 +662,18 @@ private BlobPath getManifestFolderPath(String clusterName, String clusterUUID) { * * @param clusterUUID uuid of cluster state to refer to in remote * @param clusterName name of the cluster + * @param clusterMetadataManifest manifest file of cluster * @return {@code Map} latest IndexUUID to IndexMetadata map */ - public Map getLatestIndexMetadata(String clusterName, String clusterUUID) throws IOException { - start(); - Map remoteIndexMetadata = new HashMap<>(); - Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); - if (!clusterMetadataManifest.isPresent()) { - throw new IllegalStateException("Latest index metadata is not present for the provided clusterUUID"); - } - assert Objects.equals(clusterUUID, clusterMetadataManifest.get().getClusterUUID()) + private Map getIndexMetadataMap( + String clusterName, + String clusterUUID, + ClusterMetadataManifest clusterMetadataManifest + ) { + assert Objects.equals(clusterUUID, clusterMetadataManifest.getClusterUUID()) : "Corrupt ClusterMetadataManifest found. Cluster UUID mismatch."; - for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.get().getIndices()) { + Map remoteIndexMetadata = new HashMap<>(); + for (UploadedIndexMetadata uploadedIndexMetadata : clusterMetadataManifest.getIndices()) { IndexMetadata indexMetadata = getIndexMetadata(clusterName, clusterUUID, uploadedIndexMetadata); remoteIndexMetadata.put(uploadedIndexMetadata.getIndexUUID(), indexMetadata); } @@ -558,6 +704,52 @@ private IndexMetadata getIndexMetadata(String clusterName, String clusterUUID, U } } + /** + * Fetch latest metadata from remote cluster state including global metadata and index metadata + * + * @param clusterUUID uuid of cluster state to refer to in remote + * @param clusterName name of the cluster + * @return {@link IndexMetadata} + */ + public Metadata getLatestMetadata(String clusterName, String clusterUUID) throws IOException { + start(); + Optional clusterMetadataManifest = getLatestClusterMetadataManifest(clusterName, clusterUUID); + if (!clusterMetadataManifest.isPresent()) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Latest cluster metadata manifest is not present for the provided clusterUUID: %s", clusterUUID) + ); + } + // Fetch Global Metadata + Metadata globalMetadata = getGlobalMetadata(clusterName, clusterUUID, clusterMetadataManifest.get()); + + // Fetch Index Metadata + Map indices = getIndexMetadataMap(clusterName, clusterUUID, clusterMetadataManifest.get()); + + return Metadata.builder(globalMetadata).indices(indices).build(); + } + + private Metadata getGlobalMetadata(String clusterName, String clusterUUID, ClusterMetadataManifest clusterMetadataManifest) { + String globalMetadataFileName = clusterMetadataManifest.getGlobalMetadataFileName(); + try { + // Fetch Global metadata + if (globalMetadataFileName != null) { + String[] splitPath = globalMetadataFileName.split("/"); + return GLOBAL_METADATA_FORMAT.read( + globalMetadataContainer(clusterName, clusterUUID), + splitPath[splitPath.length - 1], + blobStoreRepository.getNamedXContentRegistry() + ); + } else { + return Metadata.EMPTY_METADATA; + } + } catch (IOException e) { + throw new IllegalStateException( + String.format(Locale.ROOT, "Error while downloading Global Metadata - %s", globalMetadataFileName), + e + ); + } + } + /** * Fetch latest ClusterMetadataManifest from remote state store * @@ -590,7 +782,8 @@ public String getLastKnownUUIDFromRemote(String clusterName) { return validChain.get(0); } catch (IOException e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName) + String.format(Locale.ROOT, "Error while fetching previous UUIDs from remote store for cluster name: %s", clusterName), + e ); } } @@ -611,7 +804,8 @@ private Map getLatestManifestForAllClusterUUIDs manifest.ifPresent(clusterMetadataManifest -> manifestsByClusterUUID.put(clusterUUID, clusterMetadataManifest)); } catch (Exception e) { throw new IllegalStateException( - String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID) + String.format(Locale.ROOT, "Exception in fetching manifest for clusterUUID: %s", clusterUUID), + e ); } } @@ -777,7 +971,7 @@ private Optional getLatestManifestFileName(String clusterName, String cl private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String clusterName, String clusterUUID, String filename) throws IllegalStateException { try { - return RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.read( + return getClusterMetadataManifestBlobStoreFormat(filename).read( manifestContainer(clusterName, clusterUUID), filename, blobStoreRepository.getNamedXContentRegistry() @@ -787,6 +981,29 @@ private ClusterMetadataManifest fetchRemoteClusterMetadataManifest(String cluste } } + private ChecksumBlobStoreFormat getClusterMetadataManifestBlobStoreFormat(String fileName) { + long codecVersion = getManifestCodecVersion(fileName); + if (codecVersion == MANIFEST_CURRENT_CODEC_VERSION) { + return CLUSTER_METADATA_MANIFEST_FORMAT; + } else if (codecVersion == ClusterMetadataManifest.CODEC_V0) { + return CLUSTER_METADATA_MANIFEST_FORMAT_V0; + } + + throw new IllegalArgumentException("Cluster metadata manifest file is corrupted, don't have valid codec version"); + } + + private int getManifestCodecVersion(String fileName) { + String[] splitName = fileName.split(DELIMITER); + if (splitName.length == SPLITED_MANIFEST_FILE_LENGTH) { + return Integer.parseInt(splitName[splitName.length - 1]); // Last value would be codec version. + } else if (splitName.length < SPLITED_MANIFEST_FILE_LENGTH) { // Where codec is not part of file name, i.e. default codec version 0 + // is used. + return ClusterMetadataManifest.CODEC_V0; + } else { + throw new IllegalArgumentException("Manifest file name is corrupted"); + } + } + public static String encodeString(String content) { return Base64.getUrlEncoder().withoutPadding().encodeToString(content.getBytes(StandardCharsets.UTF_8)); } @@ -805,6 +1022,20 @@ public IndexMetadataTransferException(String errorDesc, Throwable cause) { } } + /** + * Exception for GlobalMetadata transfer failures to remote + */ + static class GlobalMetadataTransferException extends RuntimeException { + + public GlobalMetadataTransferException(String errorDesc) { + super(errorDesc); + } + + public GlobalMetadataTransferException(String errorDesc, Throwable cause) { + super(errorDesc, cause); + } + } + /** * Purges all remote cluster state against provided cluster UUIDs * @@ -896,6 +1127,7 @@ private void deleteClusterMetadata( Set filesToKeep = new HashSet<>(); Set staleManifestPaths = new HashSet<>(); Set staleIndexMetadataPaths = new HashSet<>(); + Set staleGlobalMetadataPaths = new HashSet<>(); activeManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( clusterName, @@ -904,6 +1136,7 @@ private void deleteClusterMetadata( ); clusterMetadataManifest.getIndices() .forEach(uploadedIndexMetadata -> filesToKeep.add(uploadedIndexMetadata.getUploadedFilename())); + filesToKeep.add(clusterMetadataManifest.getGlobalMetadataFileName()); }); staleManifestBlobMetadata.forEach(blobMetadata -> { ClusterMetadataManifest clusterMetadataManifest = fetchRemoteClusterMetadataManifest( @@ -912,12 +1145,19 @@ private void deleteClusterMetadata( blobMetadata.name() ); staleManifestPaths.add(new BlobPath().add(MANIFEST_PATH_TOKEN).buildAsString() + blobMetadata.name()); + if (filesToKeep.contains(clusterMetadataManifest.getGlobalMetadataFileName()) == false) { + String[] globalMetadataSplitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + staleGlobalMetadataPaths.add( + new BlobPath().add(GLOBAL_METADATA_PATH_TOKEN).buildAsString() + GLOBAL_METADATA_FORMAT.blobName( + globalMetadataSplitPath[globalMetadataSplitPath.length - 1] + ) + ); + } clusterMetadataManifest.getIndices().forEach(uploadedIndexMetadata -> { if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { staleIndexMetadataPaths.add( new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() - + uploadedIndexMetadata.getUploadedFilename() - + ".dat" + + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) ); } }); @@ -928,6 +1168,7 @@ private void deleteClusterMetadata( return; } + deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleGlobalMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleIndexMetadataPaths)); deleteStalePaths(clusterName, clusterUUID, new ArrayList<>(staleManifestPaths)); } catch (IllegalStateException e) { diff --git a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java index c929e7421b3d8..88bb855a6e70d 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java +++ b/server/src/main/java/org/opensearch/identity/tokens/AuthToken.java @@ -16,4 +16,5 @@ public interface AuthToken { String asAuthHeaderValue(); + } diff --git a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java index 3fef248ee6d3a..00e50a59e9486 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java +++ b/server/src/main/java/org/opensearch/identity/tokens/OnBehalfOfClaims.java @@ -14,46 +14,17 @@ public class OnBehalfOfClaims { private final String audience; - private final String subject; - private final Long expiration; - private final Long not_before; - private final Long issued_at; + private final Long expiration_seconds; /** * Constructor for OnBehalfOfClaims * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - * @param issued_at the issued_at time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before, Long issued_at) { - this.audience = aud; - this.subject = subject; - this.expiration = expiration; - this.not_before = not_before; - this.issued_at = issued_at; - } - - /** - * A constructor that sets a default issued at time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token - * @param not_before the not_before time in seconds for the token - */ - public OnBehalfOfClaims(String aud, String subject, Long expiration, Long not_before) { - this(aud, subject, expiration, not_before, System.currentTimeMillis() / 1000); - } + * @param expiration_seconds the length of time in seconds the token is valid - /** - * A constructor which sets a default not before time of the current time - * @param aud the Audience for the token - * @param subject the subject of the token - * @param expiration the expiration time in seconds for the token */ - public OnBehalfOfClaims(String aud, String subject, Long expiration) { - this(aud, subject, expiration, System.currentTimeMillis() / 1000); + public OnBehalfOfClaims(String aud, Long expiration_seconds) { + this.audience = aud; + this.expiration_seconds = expiration_seconds; } /** @@ -62,26 +33,14 @@ public OnBehalfOfClaims(String aud, String subject, Long expiration) { * @param subject the subject of the token */ public OnBehalfOfClaims(String aud, String subject) { - this(aud, subject, System.currentTimeMillis() / 1000 + 300); + this(aud, 300L); } public String getAudience() { return audience; } - public String getSubject() { - return subject; - } - public Long getExpiration() { - return expiration; - } - - public Long getNot_before() { - return not_before; - } - - public Long getIssued_at() { - return issued_at; + return expiration_seconds; } } diff --git a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java index 94fd08b99ac58..dec999e43110f 100644 --- a/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java +++ b/server/src/main/java/org/opensearch/index/recovery/RemoteStoreRestoreService.java @@ -141,7 +141,8 @@ public RemoteRestoreResult restore( || restoreClusterUUID.isBlank()) == false; if (metadataFromRemoteStore) { try { - remoteClusterStateService.getLatestIndexMetadata(currentState.getClusterName().value(), restoreClusterUUID) + remoteClusterStateService.getLatestMetadata(currentState.getClusterName().value(), restoreClusterUUID) + .getIndices() .values() .forEach(indexMetadata -> { indexMetadataMap.put(indexMetadata.getIndex().getName(), new Tuple<>(true, indexMetadata)); diff --git a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java index 9dcd16c53e6f3..02fc8feefd698 100644 --- a/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/PrimaryShardReplicationSource.java @@ -22,6 +22,7 @@ import org.opensearch.transport.TransportService; import java.util.List; +import java.util.function.BiConsumer; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_CHECKPOINT_INFO; import static org.opensearch.indices.replication.SegmentReplicationSourceService.Actions.GET_SEGMENT_FILES; @@ -80,8 +81,13 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { + // fileProgressTracker is a no-op for node to node recovery + // MultiFileWriter takes care of progress tracking for downloads in this scenario + // TODO: Move state management and tracking into replication methods and use chunking and data + // copy mechanisms only from MultiFileWriter final Writeable.Reader reader = GetSegmentFilesResponse::new; final ActionListener responseListener = ActionListener.map(listener, r -> r); final GetSegmentFilesRequest request = new GetSegmentFilesRequest( diff --git a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java index d2000a56401f5..12eabf1e6554f 100644 --- a/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/RemoteStoreReplicationSource.java @@ -29,6 +29,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; import java.util.stream.Collectors; /** @@ -95,6 +96,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try { @@ -117,7 +119,12 @@ public void getSegmentFiles( assert directoryFiles.contains(file) == false : "Local store already contains the file " + file; toDownloadSegmentNames.add(file); } - indexShard.getFileDownloader().download(remoteDirectory, storeDirectory, toDownloadSegmentNames); + indexShard.getFileDownloader() + .download( + remoteDirectory, + new ReplicationStatsDirectoryWrapper(storeDirectory, fileProgressTracker), + toDownloadSegmentNames + ); logger.debug("Downloaded segment files from remote store {}", filesToFetch); } finally { indexShard.store().decRef(); diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java index 6676b5b667e42..24f0cb15ddb25 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationSource.java @@ -8,13 +8,19 @@ package org.opensearch.indices.replication; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; import org.opensearch.common.util.CancellableThreads.ExecutionCancelledException; import org.opensearch.core.action.ActionListener; import org.opensearch.index.shard.IndexShard; import org.opensearch.index.store.StoreFileMetadata; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; +import java.io.IOException; import java.util.List; +import java.util.function.BiConsumer; /** * Represents the source of a replication event. @@ -39,6 +45,7 @@ public interface SegmentReplicationSource { * @param checkpoint {@link ReplicationCheckpoint} Checkpoint to fetch metadata for. * @param filesToFetch {@link List} List of files to fetch. * @param indexShard {@link IndexShard} Reference to the IndexShard. + * @param fileProgressTracker {@link BiConsumer} A consumer that updates the replication progress for shard files. * @param listener {@link ActionListener} Listener that completes with the list of files copied. */ void getSegmentFiles( @@ -46,6 +53,7 @@ void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); @@ -58,4 +66,69 @@ void getSegmentFiles( * Cancel any ongoing requests, should resolve any ongoing listeners with onFailure with a {@link ExecutionCancelledException}. */ default void cancel() {} + + /** + * Directory wrapper that records copy process for replication statistics + * + * @opensearch.internal + */ + final class ReplicationStatsDirectoryWrapper extends FilterDirectory { + private final BiConsumer fileProgressTracker; + + ReplicationStatsDirectoryWrapper(Directory in, BiConsumer fileProgressTracker) { + super(in); + this.fileProgressTracker = fileProgressTracker; + } + + @Override + public void copyFrom(Directory from, String src, String dest, IOContext context) throws IOException { + // here we wrap the index input form the source directory to report progress of file copy for the recovery stats. + // we increment the num bytes recovered in the readBytes method below, if users pull statistics they can see immediately + // how much has been recovered. + in.copyFrom(new FilterDirectory(from) { + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + final IndexInput input = in.openInput(name, context); + return new IndexInput("StatsDirectoryWrapper(" + input.toString() + ")") { + @Override + public void close() throws IOException { + input.close(); + } + + @Override + public long getFilePointer() { + throw new UnsupportedOperationException("only straight copies are supported"); + } + + @Override + public void seek(long pos) throws IOException { + throw new UnsupportedOperationException("seeks are not supported"); + } + + @Override + public long length() { + return input.length(); + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + throw new UnsupportedOperationException("slices are not supported"); + } + + @Override + public byte readByte() throws IOException { + throw new UnsupportedOperationException("use a buffer if you wanna perform well"); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + // we rely on the fact that copyFrom uses a buffer + input.readBytes(b, offset, len); + fileProgressTracker.accept(dest, (long) len); + } + }; + } + }, src, dest, context); + } + } } diff --git a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java index 0eb6ce36fa63d..cd6dbe8af90d9 100644 --- a/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java +++ b/server/src/main/java/org/opensearch/indices/replication/SegmentReplicationTarget.java @@ -170,7 +170,14 @@ public void startReplication(ActionListener listener) { final List filesToFetch = getFiles(checkpointInfo); state.setStage(SegmentReplicationState.Stage.GET_FILES); cancellableThreads.checkForCancel(); - source.getSegmentFiles(getId(), checkpointInfo.getCheckpoint(), filesToFetch, indexShard, getFilesListener); + source.getSegmentFiles( + getId(), + checkpointInfo.getCheckpoint(), + filesToFetch, + indexShard, + this::updateFileRecoveryBytes, + getFilesListener + ); }, listener::onFailure); getFilesListener.whenComplete(response -> { @@ -240,6 +247,20 @@ private boolean validateLocalChecksum(StoreFileMetadata file) { } } + /** + * Updates the state to reflect recovery progress for the given file and + * updates the last access time for the target. + * @param fileName Name of the file being downloaded + * @param bytesRecovered Number of bytes recovered + */ + private void updateFileRecoveryBytes(String fileName, long bytesRecovered) { + ReplicationLuceneIndex index = state.getIndex(); + if (index != null) { + index.addRecoveredBytesToFile(fileName, bytesRecovered); + } + setLastAccessTime(); + } + private void finalizeReplication(CheckpointInfoResponse checkpointInfoResponse) throws OpenSearchCorruptionException { cancellableThreads.checkForCancel(); state.setStage(SegmentReplicationState.Stage.FINALIZE_REPLICATION); diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java index 114702ff0d351..4e2e9f280d765 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java @@ -235,6 +235,14 @@ public static class DeviceStats implements Writeable, ToXContentFragment { final long previousWritesCompleted; final long currentSectorsWritten; final long previousSectorsWritten; + final long currentReadTime; + final long previousReadTime; + final long currentWriteTime; + final long previousWriteTime; + final long currentQueueSize; + final long previousQueueSize; + final long currentIOTime; + final long previousIOTime; public DeviceStats( final int majorDeviceNumber, @@ -244,6 +252,10 @@ public DeviceStats( final long currentSectorsRead, final long currentWritesCompleted, final long currentSectorsWritten, + final long currentReadTime, + final long currentWriteTime, + final long currrentQueueSize, + final long currentIOTime, final DeviceStats previousDeviceStats ) { this( @@ -257,7 +269,15 @@ public DeviceStats( currentSectorsRead, previousDeviceStats != null ? previousDeviceStats.currentSectorsRead : -1, currentWritesCompleted, - previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1 + previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1, + currentReadTime, + previousDeviceStats != null ? previousDeviceStats.currentReadTime : -1, + currentWriteTime, + previousDeviceStats != null ? previousDeviceStats.currentWriteTime : -1, + currrentQueueSize, + previousDeviceStats != null ? previousDeviceStats.currentQueueSize : -1, + currentIOTime, + previousDeviceStats != null ? previousDeviceStats.currentIOTime : -1 ); } @@ -272,7 +292,15 @@ private DeviceStats( final long currentSectorsRead, final long previousSectorsRead, final long currentWritesCompleted, - final long previousWritesCompleted + final long previousWritesCompleted, + final long currentReadTime, + final long previousReadTime, + final long currentWriteTime, + final long previousWriteTime, + final long currentQueueSize, + final long previousQueueSize, + final long currentIOTime, + final long previousIOTime ) { this.majorDeviceNumber = majorDeviceNumber; this.minorDeviceNumber = minorDeviceNumber; @@ -285,6 +313,14 @@ private DeviceStats( this.previousSectorsRead = previousSectorsRead; this.currentSectorsWritten = currentSectorsWritten; this.previousSectorsWritten = previousSectorsWritten; + this.currentReadTime = currentReadTime; + this.previousReadTime = previousReadTime; + this.currentWriteTime = currentWriteTime; + this.previousWriteTime = previousWriteTime; + this.currentQueueSize = currentQueueSize; + this.previousQueueSize = previousQueueSize; + this.currentIOTime = currentIOTime; + this.previousIOTime = previousIOTime; } public DeviceStats(StreamInput in) throws IOException { @@ -299,6 +335,25 @@ public DeviceStats(StreamInput in) throws IOException { previousSectorsRead = in.readLong(); currentSectorsWritten = in.readLong(); previousSectorsWritten = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + currentReadTime = in.readLong(); + previousReadTime = in.readLong(); + currentWriteTime = in.readLong(); + previousWriteTime = in.readLong(); + currentQueueSize = in.readLong(); + previousQueueSize = in.readLong(); + currentIOTime = in.readLong(); + previousIOTime = in.readLong(); + } else { + currentReadTime = 0; + previousReadTime = 0; + currentWriteTime = 0; + previousWriteTime = 0; + currentQueueSize = 0; + previousQueueSize = 0; + currentIOTime = 0; + previousIOTime = 0; + } } @Override @@ -314,6 +369,16 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(previousSectorsRead); out.writeLong(currentSectorsWritten); out.writeLong(previousSectorsWritten); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeLong(currentReadTime); + out.writeLong(previousReadTime); + out.writeLong(currentWriteTime); + out.writeLong(previousWriteTime); + out.writeLong(currentQueueSize); + out.writeLong(previousQueueSize); + out.writeLong(currentIOTime); + out.writeLong(previousIOTime); + } } public long operations() { @@ -346,6 +411,39 @@ public long writeKilobytes() { return (currentSectorsWritten - previousSectorsWritten) / 2; } + /** + * Total time taken for all read operations + */ + public long readTime() { + if (previousReadTime == -1) return -1; + return currentReadTime - previousReadTime; + } + + /** + * Total time taken for all write operations + */ + public long writeTime() { + if (previousWriteTime == -1) return -1; + return currentWriteTime - previousWriteTime; + } + + /** + * Queue size based on weighted time spent doing I/Os + */ + public long queueSize() { + if (previousQueueSize == -1) return -1; + return currentQueueSize - previousQueueSize; + } + + /** + * Total time spent doing I/Os + */ + public long ioTimeInMillis() { + if (previousIOTime == -1) return -1; + + return (currentIOTime - previousIOTime); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field("device_name", deviceName); @@ -354,9 +452,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(IoStats.WRITE_OPERATIONS, writeOperations()); builder.field(IoStats.READ_KILOBYTES, readKilobytes()); builder.field(IoStats.WRITE_KILOBYTES, writeKilobytes()); + builder.field(IoStats.READ_TIME, readTime()); + builder.field(IoStats.WRITE_TIME, writeTime()); + builder.field(IoStats.QUEUE_SIZE, queueSize()); + builder.field(IoStats.IO_TIME_MS, ioTimeInMillis()); return builder; } - } /** @@ -371,6 +472,10 @@ public static class IoStats implements Writeable, ToXContentFragment { private static final String WRITE_OPERATIONS = "write_operations"; private static final String READ_KILOBYTES = "read_kilobytes"; private static final String WRITE_KILOBYTES = "write_kilobytes"; + private static final String READ_TIME = "read_time"; + private static final String WRITE_TIME = "write_time"; + private static final String QUEUE_SIZE = "queue_size"; + private static final String IO_TIME_MS = "io_time_in_millis"; final DeviceStats[] devicesStats; final long totalOperations; @@ -378,6 +483,10 @@ public static class IoStats implements Writeable, ToXContentFragment { final long totalWriteOperations; final long totalReadKilobytes; final long totalWriteKilobytes; + final long totalReadTime; + final long totalWriteTime; + final long totalQueueSize; + final long totalIOTimeInMillis; public IoStats(final DeviceStats[] devicesStats) { this.devicesStats = devicesStats; @@ -386,18 +495,30 @@ public IoStats(final DeviceStats[] devicesStats) { long totalWriteOperations = 0; long totalReadKilobytes = 0; long totalWriteKilobytes = 0; + long totalReadTime = 0; + long totalWriteTime = 0; + long totalQueueSize = 0; + long totalIOTimeInMillis = 0; for (DeviceStats deviceStats : devicesStats) { totalOperations += deviceStats.operations() != -1 ? deviceStats.operations() : 0; totalReadOperations += deviceStats.readOperations() != -1 ? deviceStats.readOperations() : 0; totalWriteOperations += deviceStats.writeOperations() != -1 ? deviceStats.writeOperations() : 0; totalReadKilobytes += deviceStats.readKilobytes() != -1 ? deviceStats.readKilobytes() : 0; totalWriteKilobytes += deviceStats.writeKilobytes() != -1 ? deviceStats.writeKilobytes() : 0; + totalReadTime += deviceStats.readTime() != -1 ? deviceStats.readTime() : 0; + totalWriteTime += deviceStats.writeTime() != -1 ? deviceStats.writeTime() : 0; + totalQueueSize += deviceStats.queueSize() != -1 ? deviceStats.queueSize() : 0; + totalIOTimeInMillis += deviceStats.ioTimeInMillis() != -1 ? deviceStats.ioTimeInMillis() : 0; } this.totalOperations = totalOperations; this.totalReadOperations = totalReadOperations; this.totalWriteOperations = totalWriteOperations; this.totalReadKilobytes = totalReadKilobytes; this.totalWriteKilobytes = totalWriteKilobytes; + this.totalReadTime = totalReadTime; + this.totalWriteTime = totalWriteTime; + this.totalQueueSize = totalQueueSize; + this.totalIOTimeInMillis = totalIOTimeInMillis; } public IoStats(StreamInput in) throws IOException { @@ -412,6 +533,17 @@ public IoStats(StreamInput in) throws IOException { this.totalWriteOperations = in.readLong(); this.totalReadKilobytes = in.readLong(); this.totalWriteKilobytes = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalReadTime = in.readLong(); + this.totalWriteTime = in.readLong(); + this.totalQueueSize = in.readLong(); + this.totalIOTimeInMillis = in.readLong(); + } else { + this.totalReadTime = 0; + this.totalWriteTime = 0; + this.totalQueueSize = 0; + this.totalIOTimeInMillis = 0; + } } @Override @@ -425,6 +557,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalWriteOperations); out.writeLong(totalReadKilobytes); out.writeLong(totalWriteKilobytes); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeLong(totalReadTime); + out.writeLong(totalWriteTime); + out.writeLong(totalQueueSize); + out.writeLong(totalIOTimeInMillis); + } } public DeviceStats[] getDevicesStats() { @@ -451,6 +589,34 @@ public long getTotalWriteKilobytes() { return totalWriteKilobytes; } + /** + * Sum of read time across all devices + */ + public long getTotalReadTime() { + return totalReadTime; + } + + /** + * Sum of write time across all devices + */ + public long getTotalWriteTime() { + return totalWriteTime; + } + + /** + * Sum of queue size across all devices + */ + public long getTotalQueueSize() { + return totalQueueSize; + } + + /** + * Sum of IO time across all devices + */ + public long getTotalIOTimeMillis() { + return totalIOTimeInMillis; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { if (devicesStats.length > 0) { @@ -468,11 +634,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(WRITE_OPERATIONS, totalWriteOperations); builder.field(READ_KILOBYTES, totalReadKilobytes); builder.field(WRITE_KILOBYTES, totalWriteKilobytes); + + builder.field(READ_TIME, totalReadTime); + builder.field(WRITE_TIME, totalWriteTime); + builder.field(QUEUE_SIZE, totalQueueSize); + builder.field(IO_TIME_MS, totalIOTimeInMillis); builder.endObject(); } return builder; } - } private final long timestamp; diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java index e20d84cd9763e..f4731a4a34373 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java @@ -109,6 +109,25 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, List devicesStats = new ArrayList<>(); + /** + * The /proc/diskstats file displays the I/O statistics of block devices. + * Each line contains the following 14 fields: ( + additional fields ) + * + * 1 major number + * 2 minor number + * 3 device name + * 4 reads completed successfully + * 5 reads merged + * 6 sectors read + * 7 time spent reading (ms) + * 8 writes completed + * 9 writes merged + * 10 sectors written + * 11 time spent writing (ms) + * 12 I/Os currently in progress + * 13 time spent doing I/Os (ms) ---- IO use percent + * 14 weighted time spent doing I/Os (ms) ---- Queue size + */ List lines = readProcDiskStats(); if (!lines.isEmpty()) { for (String line : lines) { @@ -123,6 +142,12 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, final long sectorsRead = Long.parseLong(fields[5]); final long writesCompleted = Long.parseLong(fields[7]); final long sectorsWritten = Long.parseLong(fields[9]); + // readTime and writeTime calculates the total read/write time taken for each request to complete + // ioTime calculates actual time queue and disks are busy + final long readTime = Long.parseLong(fields[6]); + final long writeTime = Long.parseLong(fields[10]); + final long ioTime = fields.length > 12 ? Long.parseLong(fields[12]) : 0; + final long queueSize = fields.length > 13 ? Long.parseLong(fields[13]) : 0; final FsInfo.DeviceStats deviceStats = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -131,6 +156,10 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, deviceMap.get(Tuple.tuple(majorDeviceNumber, minorDeviceNumber)) ); devicesStats.add(deviceStats); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index c456f01135dee..c9148f382a028 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -600,10 +600,23 @@ protected Node( MetricsRegistryFactory metricsRegistryFactory; if (FeatureFlags.isEnabled(TELEMETRY)) { final TelemetrySettings telemetrySettings = new TelemetrySettings(settings, clusterService.getClusterSettings()); - List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); - TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); - tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); - metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + if (telemetrySettings.isTracingFeatureEnabled() || telemetrySettings.isMetricsFeatureEnabled()) { + List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); + TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); + if (telemetrySettings.isTracingFeatureEnabled()) { + tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); + } else { + tracerFactory = new NoopTracerFactory(); + } + if (telemetrySettings.isMetricsFeatureEnabled()) { + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + } else { + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } + } else { + tracerFactory = new NoopTracerFactory(); + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } } else { tracerFactory = new NoopTracerFactory(); metricsRegistryFactory = new NoopMetricsRegistryFactory(); @@ -1102,7 +1115,8 @@ protected Node( searchPipelineService, fileCache, taskCancellationMonitoringService, - resourceUsageCollectorService + resourceUsageCollectorService, + repositoryService ); final SearchService searchService = newSearchService( diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index 9bb07080fa717..e2d7bc2c86ba3 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -53,6 +53,7 @@ import org.opensearch.ingest.IngestService; import org.opensearch.monitor.MonitorService; import org.opensearch.plugins.PluginsService; +import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.AggregationUsageService; import org.opensearch.search.backpressure.SearchBackpressureService; @@ -93,6 +94,7 @@ public class NodeService implements Closeable { private final Discovery discovery; private final FileCache fileCache; private final TaskCancellationMonitoringService taskCancellationMonitoringService; + private final RepositoriesService repositoriesService; NodeService( Settings settings, @@ -116,7 +118,8 @@ public class NodeService implements Closeable { SearchPipelineService searchPipelineService, FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, - ResourceUsageCollectorService resourceUsageCollectorService + ResourceUsageCollectorService resourceUsageCollectorService, + RepositoriesService repositoriesService ) { this.settings = settings; this.threadPool = threadPool; @@ -140,6 +143,7 @@ public class NodeService implements Closeable { this.fileCache = fileCache; this.taskCancellationMonitoringService = taskCancellationMonitoringService; this.resourceUsageCollectorService = resourceUsageCollectorService; + this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); } @@ -221,7 +225,8 @@ public NodeStats stats( boolean fileCacheStats, boolean taskCancellation, boolean searchPipelineStats, - boolean resourceUsageStats + boolean resourceUsageStats, + boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will // only be applied to the sensible ones to use, like refresh/merge/flush/indexing stats) @@ -250,7 +255,8 @@ public NodeStats stats( weightedRoutingStats ? WeightedRoutingStats.getInstance() : null, fileCacheStats && fileCache != null ? fileCache.fileCacheStats() : null, taskCancellation ? this.taskCancellationMonitoringService.stats() : null, - searchPipelineStats ? this.searchPipelineService.stats() : null + searchPipelineStats ? this.searchPipelineService.stats() : null, + repositoriesStats ? this.repositoriesService.getRepositoriesStats() : null ); } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java index 72266c053a1ae..68669feb16abc 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesService.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesService.java @@ -457,7 +457,6 @@ public void applyClusterState(ClusterChangedEvent event) { logger.debug("unregistering repository [{}]", entry.getKey()); Repository repository = entry.getValue(); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); } else { survivors.put(entry.getKey(), entry.getValue()); } @@ -485,7 +484,6 @@ public void applyClusterState(ClusterChangedEvent event) { } else { logger.debug("updating repository [{}]", repositoryMetadata.name()); closeRepository(repository); - archiveRepositoryStats(repository, state.version()); repository = null; try { repository = createRepository(repositoryMetadata, typesRegistry); @@ -575,12 +573,12 @@ public Repository repository(String repositoryName) { } public List repositoriesStats() { - List archivedRepoStats = repositoriesStatsArchive.getArchivedStats(); List activeRepoStats = getRepositoryStatsForActiveRepositories(); + return activeRepoStats; + } - List repositoriesStats = new ArrayList<>(archivedRepoStats); - repositoriesStats.addAll(activeRepoStats); - return repositoriesStats; + public RepositoriesStats getRepositoriesStats() { + return new RepositoriesStats(repositoriesStats()); } private List getRepositoryStatsForActiveRepositories() { @@ -640,15 +638,6 @@ public void closeRepository(Repository repository) { repository.close(); } - private void archiveRepositoryStats(Repository repository, long clusterStateVersion) { - if (repository instanceof MeteredBlobStoreRepository) { - RepositoryStatsSnapshot stats = ((MeteredBlobStoreRepository) repository).statsSnapshotForArchival(clusterStateVersion); - if (repositoriesStatsArchive.archive(stats) == false) { - logger.warn("Unable to archive the repository stats [{}] as the archive is full.", stats); - } - } - } - /** * Creates repository holder. This method starts the non-internal repository */ diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java new file mode 100644 index 0000000000000..b24e0dddd852a --- /dev/null +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStats.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.repositories; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.common.util.CollectionUtils; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +/** + * Encapsulates stats for multiple repositories* + */ +public class RepositoriesStats implements Writeable, ToXContentObject { + + List repositoryStatsSnapshots; + + public RepositoriesStats(List repositoryStatsSnapshots) { + this.repositoryStatsSnapshots = repositoryStatsSnapshots; + } + + public RepositoriesStats(StreamInput in) throws IOException { + this.repositoryStatsSnapshots = in.readList(RepositoryStatsSnapshot::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeList(repositoryStatsSnapshots); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startArray("repositories"); + if (CollectionUtils.isEmpty(repositoryStatsSnapshots) == false) { + for (RepositoryStatsSnapshot repositoryStatsSnapshot : repositoryStatsSnapshots) { + repositoryStatsSnapshot.toXContent(builder, params); + } + } + builder.endArray(); + return builder; + } +} diff --git a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java index b8f100706f81e..3d35f75176eaf 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoriesStatsArchive.java @@ -70,11 +70,6 @@ public RepositoriesStatsArchive(TimeValue retentionPeriod, int maxCapacity, Long * @return {@code true} if the repository stats were archived, {@code false} otherwise. */ synchronized boolean archive(final RepositoryStatsSnapshot repositoryStats) { - assert containsRepositoryStats(repositoryStats) == false : "A repository with ephemeral id " - + repositoryStats.getRepositoryInfo().ephemeralId - + " is already archived"; - assert repositoryStats.isArchived(); - evict(); if (archive.size() >= maxCapacity) { @@ -116,15 +111,6 @@ private void evict() { } } - private boolean containsRepositoryStats(RepositoryStatsSnapshot repositoryStats) { - return archive.stream() - .anyMatch( - entry -> entry.repositoryStatsSnapshot.getRepositoryInfo().ephemeralId.equals( - repositoryStats.getRepositoryInfo().ephemeralId - ) - ); - } - private static class ArchiveEntry { private final RepositoryStatsSnapshot repositoryStatsSnapshot; private final long createdAtMillis; diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java index 8aa86fc46d591..387a685bd6526 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryInfo.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.Nullable; import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -51,64 +50,27 @@ * @opensearch.internal */ public final class RepositoryInfo implements Writeable, ToXContentFragment { - public final String ephemeralId; public final String name; public final String type; public final Map location; - public final long startedAt; - @Nullable - public final Long stoppedAt; - public RepositoryInfo(String ephemeralId, String name, String type, Map location, long startedAt) { - this(ephemeralId, name, type, location, startedAt, null); - } - - public RepositoryInfo( - String ephemeralId, - String name, - String type, - Map location, - long startedAt, - @Nullable Long stoppedAt - ) { - this.ephemeralId = ephemeralId; + public RepositoryInfo(String name, String type, Map location) { this.name = name; this.type = type; this.location = location; - this.startedAt = startedAt; - if (stoppedAt != null && startedAt > stoppedAt) { - throw new IllegalArgumentException("createdAt must be before or equal to stoppedAt"); - } - this.stoppedAt = stoppedAt; } public RepositoryInfo(StreamInput in) throws IOException { - this.ephemeralId = in.readString(); this.name = in.readString(); this.type = in.readString(); this.location = in.readMap(StreamInput::readString, StreamInput::readString); - this.startedAt = in.readLong(); - this.stoppedAt = in.readOptionalLong(); - } - - public RepositoryInfo stopped(long stoppedAt) { - assert isStopped() == false : "The repository is already stopped"; - - return new RepositoryInfo(ephemeralId, name, type, location, startedAt, stoppedAt); - } - - public boolean isStopped() { - return stoppedAt != null; } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(ephemeralId); out.writeString(name); out.writeString(type); out.writeMap(location, StreamOutput::writeString, StreamOutput::writeString); - out.writeLong(startedAt); - out.writeOptionalLong(stoppedAt); } @Override @@ -116,11 +78,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field("repository_name", name); builder.field("repository_type", type); builder.field("repository_location", location); - builder.field("repository_ephemeral_id", ephemeralId); - builder.field("repository_started_at", startedAt); - if (stoppedAt != null) { - builder.field("repository_stopped_at", stoppedAt); - } return builder; } @@ -129,17 +86,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryInfo that = (RepositoryInfo) o; - return ephemeralId.equals(that.ephemeralId) - && name.equals(that.name) - && type.equals(that.type) - && location.equals(that.location) - && startedAt == that.startedAt - && Objects.equals(stoppedAt, that.stoppedAt); + return name.equals(that.name) && type.equals(that.type) && location.equals(that.location); } @Override public int hashCode() { - return Objects.hash(ephemeralId, name, type, location, startedAt, stoppedAt); + return Objects.hash(name, type, location); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java index efd5d6f8560b6..ab97c5eaa1f7a 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStats.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStats.java @@ -32,9 +32,13 @@ package org.opensearch.repositories; +import org.opensearch.common.Nullable; +import org.opensearch.common.blobstore.BlobStore; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; import java.util.Collections; @@ -47,32 +51,63 @@ * * @opensearch.internal */ -public class RepositoryStats implements Writeable { +public class RepositoryStats implements Writeable, ToXContentFragment { public static final RepositoryStats EMPTY_STATS = new RepositoryStats(Collections.emptyMap()); + @Nullable public final Map requestCounts; + @Nullable + public final Map> extendedStats; + public final boolean detailed; public RepositoryStats(Map requestCounts) { this.requestCounts = Collections.unmodifiableMap(requestCounts); + this.extendedStats = Collections.emptyMap(); + this.detailed = false; + } + + public RepositoryStats(Map> extendedStats, boolean detailed) { + this.requestCounts = Collections.emptyMap(); + this.extendedStats = Collections.unmodifiableMap(extendedStats); + this.detailed = detailed; } public RepositoryStats(StreamInput in) throws IOException { this.requestCounts = in.readMap(StreamInput::readString, StreamInput::readLong); + this.extendedStats = in.readMap( + e -> e.readEnum(BlobStore.Metric.class), + i -> i.readMap(StreamInput::readString, StreamInput::readLong) + ); + this.detailed = in.readBoolean(); } public RepositoryStats merge(RepositoryStats otherStats) { - final Map result = new HashMap<>(); - result.putAll(requestCounts); - for (Map.Entry entry : otherStats.requestCounts.entrySet()) { - result.merge(entry.getKey(), entry.getValue(), Math::addExact); + assert this.detailed == otherStats.detailed; + if (detailed) { + final Map> result = new HashMap<>(); + result.putAll(extendedStats); + for (Map.Entry> entry : otherStats.extendedStats.entrySet()) { + for (Map.Entry nested : entry.getValue().entrySet()) { + result.get(entry.getKey()).merge(nested.getKey(), nested.getValue(), Math::addExact); + } + } + return new RepositoryStats(result, true); + } else { + final Map result = new HashMap<>(); + result.putAll(requestCounts); + for (Map.Entry entry : otherStats.requestCounts.entrySet()) { + result.merge(entry.getKey(), entry.getValue(), Math::addExact); + } + return new RepositoryStats(result); } - return new RepositoryStats(result); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeMap(requestCounts, StreamOutput::writeString, StreamOutput::writeLong); + out.writeMap(extendedStats, StreamOutput::writeEnum, (o, v) -> o.writeMap(v, StreamOutput::writeString, StreamOutput::writeLong)); + out.writeBoolean(detailed); } @Override @@ -80,16 +115,32 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; RepositoryStats that = (RepositoryStats) o; - return requestCounts.equals(that.requestCounts); + return requestCounts.equals(that.requestCounts) && extendedStats.equals(that.extendedStats) && detailed == that.detailed; } @Override public int hashCode() { - return Objects.hash(requestCounts); + return Objects.hash(requestCounts, detailed, extendedStats); } @Override public String toString() { - return "RepositoryStats{" + "requestCounts=" + requestCounts + '}'; + return "RepositoryStats{" + "requestCounts=" + requestCounts + "extendedStats=" + extendedStats + "detailed =" + detailed + "}"; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + if (detailed == false) { + builder.field("request_counts", requestCounts); + } else { + extendedStats.forEach((k, v) -> { + try { + builder.field(k.metricName(), v); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + return builder; } } diff --git a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java index 2b061cd2c2cc9..0a727980fad0d 100644 --- a/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java +++ b/server/src/main/java/org/opensearch/repositories/RepositoryStatsSnapshot.java @@ -53,21 +53,17 @@ public final class RepositoryStatsSnapshot implements Writeable, ToXContentObjec private final RepositoryInfo repositoryInfo; private final RepositoryStats repositoryStats; private final long clusterVersion; - private final boolean archived; - public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion, boolean archived) { - assert archived != (clusterVersion == UNKNOWN_CLUSTER_VERSION); + public RepositoryStatsSnapshot(RepositoryInfo repositoryInfo, RepositoryStats repositoryStats, long clusterVersion) { this.repositoryInfo = repositoryInfo; this.repositoryStats = repositoryStats; this.clusterVersion = clusterVersion; - this.archived = archived; } public RepositoryStatsSnapshot(StreamInput in) throws IOException { this.repositoryInfo = new RepositoryInfo(in); this.repositoryStats = new RepositoryStats(in); this.clusterVersion = in.readLong(); - this.archived = in.readBoolean(); } public RepositoryInfo getRepositoryInfo() { @@ -78,10 +74,6 @@ public RepositoryStats getRepositoryStats() { return repositoryStats; } - public boolean isArchived() { - return archived; - } - public long getClusterVersion() { return clusterVersion; } @@ -91,18 +83,13 @@ public void writeTo(StreamOutput out) throws IOException { repositoryInfo.writeTo(out); repositoryStats.writeTo(out); out.writeLong(clusterVersion); - out.writeBoolean(archived); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); repositoryInfo.toXContent(builder, params); - builder.field("request_counts", repositoryStats.requestCounts); - builder.field("archived", archived); - if (archived) { - builder.field("cluster_version", clusterVersion); - } + repositoryStats.toXContent(builder, params); builder.endObject(); return builder; } @@ -114,13 +101,12 @@ public boolean equals(Object o) { RepositoryStatsSnapshot that = (RepositoryStatsSnapshot) o; return repositoryInfo.equals(that.repositoryInfo) && repositoryStats.equals(that.repositoryStats) - && clusterVersion == that.clusterVersion - && archived == that.archived; + && clusterVersion == that.clusterVersion; } @Override public int hashCode() { - return Objects.hash(repositoryInfo, repositoryStats, clusterVersion, archived); + return Objects.hash(repositoryInfo, repositoryStats, clusterVersion); } @Override diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java index 69883e0d19c8d..8a2260e1f6d90 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/BlobStoreRepository.java @@ -173,6 +173,7 @@ import java.util.stream.Stream; import static org.opensearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo.canonicalName; +import static org.opensearch.repositories.blobstore.ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS; /** * BlobStore - based implementation of Snapshot Repository @@ -850,6 +851,8 @@ public RepositoryStats stats() { final BlobStore store = blobStore.get(); if (store == null) { return RepositoryStats.EMPTY_STATS; + } else if (store.extendedStats() != null && store.extendedStats().isEmpty() == false) { + return new RepositoryStats(store.extendedStats(), true); } return new RepositoryStats(store.stats()); } @@ -3333,7 +3336,12 @@ private void writeShardIndexBlobAtomic( () -> new ParameterizedMessage("[{}] Writing shard index [{}] to [{}]", metadata.name(), indexGeneration, shardContainer.path()) ); final String blobName = INDEX_SHARD_SNAPSHOTS_FORMAT.blobName(String.valueOf(indexGeneration)); - writeAtomic(shardContainer, blobName, INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor), true); + writeAtomic( + shardContainer, + blobName, + INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS), + true + ); } // Unused blobs are all previous index-, data- and meta-blobs and that are not referenced by the new index- as well as all diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java index 7e1960171043a..17cb68f798094 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/ChecksumBlobStoreFormat.java @@ -83,7 +83,7 @@ public final class ChecksumBlobStoreFormat { // Serialization parameters to specify correct context for metadata serialization - private static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; + public static final ToXContent.Params SNAPSHOT_ONLY_FORMAT_PARAMS; static { Map snapshotOnlyParams = new HashMap<>(); @@ -171,7 +171,7 @@ public T deserialize(String blobName, NamedXContentRegistry namedXContentRegistr */ public void write(final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor) throws IOException { final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, SNAPSHOT_ONLY_FORMAT_PARAMS); blobContainer.writeBlob(blobName, bytes.streamInput(), bytes.length(), false); } @@ -184,13 +184,15 @@ public void write(final T obj, final BlobContainer blobContainer, final String n * @param name blob name * @param compressor whether to use compression * @param listener listener to listen to write result + * @param params ToXContent params */ public void writeAsync( final T obj, final BlobContainer blobContainer, final String name, final Compressor compressor, - ActionListener listener + ActionListener listener, + final ToXContent.Params params ) throws IOException { if (blobContainer instanceof AsyncMultiStreamBlobContainer == false) { write(obj, blobContainer, name, compressor); @@ -198,7 +200,7 @@ public void writeAsync( return; } final String blobName = blobName(name); - final BytesReference bytes = serialize(obj, blobName, compressor); + final BytesReference bytes = serialize(obj, blobName, compressor, params); final String resourceDescription = "ChecksumBlobStoreFormat.writeAsync(blob=\"" + blobName + "\")"; try (IndexInput input = new ByteArrayIndexInput(resourceDescription, BytesReference.toBytes(bytes))) { long expectedChecksum; @@ -230,7 +232,8 @@ public void writeAsync( } } - public BytesReference serialize(final T obj, final String blobName, final Compressor compressor) throws IOException { + public BytesReference serialize(final T obj, final String blobName, final Compressor compressor, final ToXContent.Params params) + throws IOException { try (BytesStreamOutput outputStream = new BytesStreamOutput()) { try ( OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput( @@ -254,7 +257,7 @@ public void close() throws IOException { ) ) { builder.startObject(); - obj.toXContent(builder, SNAPSHOT_ONLY_FORMAT_PARAMS); + obj.toXContent(builder, params); builder.endObject(); } CodecUtil.writeFooter(indexOutput); diff --git a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java index d4921f4e6d2e7..0651ff586d412 100644 --- a/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java +++ b/server/src/main/java/org/opensearch/repositories/blobstore/MeteredBlobStoreRepository.java @@ -34,12 +34,10 @@ import org.opensearch.cluster.metadata.RepositoryMetadata; import org.opensearch.cluster.service.ClusterService; -import org.opensearch.common.UUIDs; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.repositories.RepositoryInfo; import org.opensearch.repositories.RepositoryStatsSnapshot; -import org.opensearch.threadpool.ThreadPool; import java.util.Map; @@ -59,14 +57,7 @@ public MeteredBlobStoreRepository( Map location ) { super(metadata, namedXContentRegistry, clusterService, recoverySettings); - ThreadPool threadPool = clusterService.getClusterApplierService().threadPool(); - this.repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), - metadata.name(), - metadata.type(), - location, - threadPool.absoluteTimeInMillis() - ); + this.repositoryInfo = new RepositoryInfo(metadata.name(), metadata.type(), location); } @Override @@ -78,11 +69,6 @@ public void reload(RepositoryMetadata repositoryMetadata) { } public RepositoryStatsSnapshot statsSnapshot() { - return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION, false); - } - - public RepositoryStatsSnapshot statsSnapshotForArchival(long clusterVersion) { - RepositoryInfo stoppedRepoInfo = repositoryInfo.stopped(threadPool.absoluteTimeInMillis()); - return new RepositoryStatsSnapshot(stoppedRepoInfo, stats(), clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, stats(), RepositoryStatsSnapshot.UNKNOWN_CLUSTER_VERSION); } } diff --git a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java index edb20cfa9dfc5..24dcab98c8870 100644 --- a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java +++ b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java @@ -28,6 +28,20 @@ public class TelemetrySettings { Setting.Property.Dynamic ); + public static final Setting TRACER_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.tracer.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + + public static final Setting METRICS_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.metrics.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + /** * Probability of sampler */ @@ -53,9 +67,14 @@ public class TelemetrySettings { private volatile boolean tracingEnabled; private volatile double samplingProbability; + private final boolean tracingFeatureEnabled; + private final boolean metricsFeatureEnabled; + public TelemetrySettings(Settings settings, ClusterSettings clusterSettings) { this.tracingEnabled = TRACER_ENABLED_SETTING.get(settings); this.samplingProbability = TRACER_SAMPLER_PROBABILITY.get(settings); + this.tracingFeatureEnabled = TRACER_FEATURE_ENABLED_SETTING.get(settings); + this.metricsFeatureEnabled = METRICS_FEATURE_ENABLED_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(TRACER_ENABLED_SETTING, this::setTracingEnabled); clusterSettings.addSettingsUpdateConsumer(TRACER_SAMPLER_PROBABILITY, this::setSamplingProbability); @@ -83,4 +102,12 @@ public void setSamplingProbability(double samplingProbability) { public double getSamplingProbability() { return samplingProbability; } + + public boolean isTracingFeatureEnabled() { + return tracingFeatureEnabled; + } + + public boolean isMetricsFeatureEnabled() { + return metricsFeatureEnabled; + } } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java index b2308402379ac..631fb8242d78e 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java @@ -59,6 +59,11 @@ public SpanScope withSpanInScope(Span span) { return getDelegateTracer().withSpanInScope(span); } + @Override + public boolean isRecording() { + return getDelegateTracer().isRecording(); + } + @Override public void close() throws IOException { defaultTracer.close(); diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java index 0a9757310fe8b..e0fb690bd29be 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.channels; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.http.HttpChannel; import org.opensearch.http.HttpResponse; @@ -50,7 +49,7 @@ private TraceableHttpChannel(HttpChannel delegate, Span span, Tracer tracer) { * @return http channel */ public static HttpChannel create(HttpChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableHttpChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java index d256c9d4d0e53..32769dd1d848d 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.common.io.stream.BytesStreamOutput; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.RestChannel; @@ -53,7 +52,7 @@ private TraceableRestChannel(RestChannel delegate, Span span, Tracer tracer) { * @return rest channel */ public static RestChannel create(RestChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableRestChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java index bd60c35c3baac..45268b4807cd9 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.Version; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -53,7 +52,7 @@ public TraceableTcpTransportChannel(TcpTransportChannel delegate, Span span, Tra * @return transport channel */ public static TransportChannel create(TcpTransportChannel delegate, final Span span, final Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { delegate.getChannel().addCloseListener(new ActionListener() { @Override public void onResponse(Void unused) { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java index 538bf82a1dbec..eb9d53d2df51b 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.handler; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -55,7 +54,7 @@ public static TransportResponseHandler create( Span span, Tracer tracer ) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableTransportResponseHandler(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java index 3e201641a529b..0cb4ce71d05f8 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.listener; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.telemetry.tracing.Span; import org.opensearch.telemetry.tracing.SpanScope; @@ -47,7 +46,7 @@ private TraceableActionListener(ActionListener delegate, Span span, Tr * @return action listener */ public static ActionListener create(ActionListener delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableActionListener(delegate, span, tracer); } else { return delegate; diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 7a1b6f113d0e8..3491f18da9550 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -284,6 +284,10 @@ public void testSerialization() throws IOException { assertEquals(ioStats.getTotalReadOperations(), deserializedIoStats.getTotalReadOperations()); assertEquals(ioStats.getTotalWriteKilobytes(), deserializedIoStats.getTotalWriteKilobytes()); assertEquals(ioStats.getTotalWriteOperations(), deserializedIoStats.getTotalWriteOperations()); + assertEquals(ioStats.getTotalReadTime(), deserializedIoStats.getTotalReadTime()); + assertEquals(ioStats.getTotalWriteTime(), deserializedIoStats.getTotalWriteTime()); + assertEquals(ioStats.getTotalQueueSize(), deserializedIoStats.getTotalQueueSize()); + assertEquals(ioStats.getTotalIOTimeMillis(), deserializedIoStats.getTotalIOTimeMillis()); assertEquals(ioStats.getDevicesStats().length, deserializedIoStats.getDevicesStats().length); for (int i = 0; i < ioStats.getDevicesStats().length; i++) { FsInfo.DeviceStats deviceStats = ioStats.getDevicesStats()[i]; @@ -645,6 +649,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), null ); deviceStatsArray[i] = new FsInfo.DeviceStats( @@ -655,6 +663,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), previousDeviceStats ); } @@ -840,6 +852,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { weightedRoutingStats, null, null, + null, null ); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index cdd0ea863ce37..f025e3a63b9bf 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -244,7 +244,9 @@ private SearchRequest mutate(SearchRequest searchRequest) { ); mutators.add(() -> mutation.source(randomValueOtherThan(searchRequest.source(), this::createSearchSourceBuilder))); mutators.add(() -> mutation.setCcsMinimizeRoundtrips(searchRequest.isCcsMinimizeRoundtrips() == false)); - mutators.add(() -> mutation.setPhaseTook(searchRequest.isPhaseTook() == false)); + mutators.add( + () -> mutation.setPhaseTook(searchRequest.isPhaseTook() == null ? randomBoolean() : searchRequest.isPhaseTook() == false) + ); mutators.add( () -> mutation.setCancelAfterTimeInterval( searchRequest.getCancelAfterTimeInterval() != null diff --git a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java index 64949cf861f70..6f03e87bf5824 100644 --- a/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java +++ b/server/src/test/java/org/opensearch/cluster/DiskUsageTests.java @@ -191,6 +191,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -218,6 +219,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ), new NodeStats( @@ -245,6 +247,7 @@ public void testFillDiskUsage() { null, null, null, + null, null ) ); @@ -303,6 +306,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -330,6 +334,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ), new NodeStats( @@ -357,6 +362,7 @@ public void testFillDiskUsageSomeInvalidValues() { null, null, null, + null, null ) ); diff --git a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java index f37823d2c0c7d..1c0dc7fc1ca2d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/CoordinationStateTests.java @@ -938,6 +938,8 @@ public void testHandlePrePublishAndCommitWhenRemoteStateEnabled() throws IOExcep Version.CURRENT, randomAlphaOfLength(10), false, + 1, + randomAlphaOfLength(10), Collections.emptyList(), randomAlphaOfLength(10), true diff --git a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java index 66426c2a880a3..6c9a3201656d7 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/ClusterMetadataManifestTests.java @@ -26,6 +26,33 @@ public class ClusterMetadataManifestTests extends OpenSearchTestCase { + public void testClusterMetadataManifestXContentV0() throws IOException { + UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); + ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( + 1L, + 1L, + "test-cluster-uuid", + "test-state-uuid", + Version.CURRENT, + "test-node-id", + false, + ClusterMetadataManifest.CODEC_V0, + null, + Collections.singletonList(uploadedIndexMetadata), + "prev-cluster-uuid", + true + ); + final XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + originalManifest.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + + try (XContentParser parser = createParser(JsonXContent.jsonXContent, BytesReference.bytes(builder))) { + final ClusterMetadataManifest fromXContentManifest = ClusterMetadataManifest.fromXContentV0(parser); + assertEquals(originalManifest, fromXContentManifest); + } + } + public void testClusterMetadataManifestXContent() throws IOException { UploadedIndexMetadata uploadedIndexMetadata = new UploadedIndexMetadata("test-index", "test-uuid", "/test/upload/path"); ClusterMetadataManifest originalManifest = new ClusterMetadataManifest( @@ -36,6 +63,8 @@ public void testClusterMetadataManifestXContent() throws IOException { Version.CURRENT, "test-node-id", false, + ClusterMetadataManifest.CODEC_V1, + "test-global-metadata-file", Collections.singletonList(uploadedIndexMetadata), "prev-cluster-uuid", true @@ -60,6 +89,8 @@ public void testClusterMetadataManifestSerializationEqualsHashCode() { Version.CURRENT, "B10RX1f5RJenMQvYccCgSQ", true, + 1, + "test-global-metadata-file", randomUploadedIndexMetadataList(), "yfObdx8KSMKKrXf8UyHhM", true diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 6ecbc23f75bee..49b7f0ff8d1a9 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -12,6 +12,7 @@ import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.CoordinationMetadata; +import org.opensearch.cluster.metadata.IndexGraveyard; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.Metadata; import org.opensearch.cluster.node.DiscoveryNodes; @@ -28,12 +29,14 @@ import org.opensearch.common.lucene.store.ByteArrayIndexInput; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryMissingException; @@ -65,7 +68,11 @@ import org.mockito.ArgumentMatchers; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.FORMAT_PARAMS; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.METADATA_FILE_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_TYPE_ATTRIBUTE_KEY_FORMAT; @@ -228,14 +235,15 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { assertThat(manifest.getIndices().get(0).getIndexName(), is(uploadedIndexMetadata.getIndexName())); assertThat(manifest.getIndices().get(0).getIndexUUID(), is(uploadedIndexMetadata.getIndexUUID())); assertThat(manifest.getIndices().get(0).getUploadedFilename(), notNullValue()); + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); assertThat(manifest.getPreviousClusterUUID(), is(expectedManifest.getPreviousClusterUUID())); - assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 1); - assertEquals(writeContextArgumentCaptor.getAllValues().size(), 1); + assertEquals(actionListenerArgumentCaptor.getAllValues().size(), 2); + assertEquals(writeContextArgumentCaptor.getAllValues().size(), 2); WriteContext capturedWriteContext = writeContextArgumentCaptor.getValue(); byte[] writtenBytes = capturedWriteContext.getStreamProvider(Integer.MAX_VALUE).provideStream(0).getInputStream().readAllBytes(); @@ -259,7 +267,7 @@ public void testWriteFullMetadataInParallelSuccess() throws IOException { } - public void testWriteFullMetadataInParallelFailure() throws IOException { + public void testWriteFullMetadataFailureForGlobalMetadata() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); @@ -270,6 +278,27 @@ public void testWriteFullMetadataInParallelFailure() throws IOException { return null; }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); + assertThrows( + RemoteClusterStateService.GlobalMetadataTransferException.class, + () -> remoteClusterStateService.writeFullMetadata(clusterState, randomAlphaOfLength(10)) + ); + } + + public void testWriteFullMetadataInParallelFailureForIndexMetadata() throws IOException { + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + AsyncMultiStreamBlobContainer container = (AsyncMultiStreamBlobContainer) mockBlobStoreObjects(AsyncMultiStreamBlobContainer.class); + + ArgumentCaptor> actionListenerArgumentCaptor = ArgumentCaptor.forClass(ActionListener.class); + + doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onResponse(null); + return null; + }).doAnswer((i) -> { + actionListenerArgumentCaptor.getValue().onFailure(new RuntimeException("Cannot upload to remote")); + return null; + }).when(container).asyncBlobUpload(any(WriteContext.class), actionListenerArgumentCaptor.capture()); + remoteClusterStateService.start(); assertThrows( RemoteClusterStateService.IndexMetadataTransferException.class, @@ -334,6 +363,207 @@ public void testWriteIncrementalMetadataSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + /* + * Here we will verify the migration of manifest file from codec V0 and V1. + * + * Initially codec version is 0 and global metadata is also null, we will perform index metadata update. + * In final manifest codec version should be 1 and + * global metadata should be updated, even if it was not changed in this cluster state update + */ + public void testMigrationFromCodecV0ManifestToCodecV1Manifest() throws IOException { + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .nodes(nodesWithLocalNodeClusterManager()) + .build(); + + // Update only index metadata + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(previousClusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(previousClusterState).metadata(newMetadata).build(); + + // previous manifest with codec 0 and null global metadata + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(ClusterMetadataManifest.CODEC_V0) + .globalMetadataFileName(null) + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifestAfterUpdate = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + newClusterState, + previousManifest + ); + + // global metadata is updated + assertThat(manifestAfterUpdate.getGlobalMetadataFileName(), notNullValue()); + // Manifest file with codec version with 1 is updated. + assertThat(manifestAfterUpdate.getCodecVersion(), is(ClusterMetadataManifest.CODEC_V1)); + } + + public void testWriteIncrementalGlobalMetadataSuccess() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState previousClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + + final ClusterMetadataManifest previousManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + + remoteClusterStateService.start(); + final ClusterMetadataManifest manifest = remoteClusterStateService.writeIncrementalMetadata( + previousClusterState, + clusterState, + previousManifest + ); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(Collections.emptyList()) + .globalMetadataFileName("mock-filename") + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .previousClusterUUID("prev-cluster-uuid") + .build(); + + assertThat(manifest.getGlobalMetadataFileName(), notNullValue()); + assertThat(manifest.getClusterTerm(), is(expectedManifest.getClusterTerm())); + assertThat(manifest.getStateVersion(), is(expectedManifest.getStateVersion())); + assertThat(manifest.getClusterUUID(), is(expectedManifest.getClusterUUID())); + assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); + } + + /* + * Here we will verify index metadata is not uploaded again if change is only in global metadata + */ + public void testGlobalMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .globalMetadataFileName("global-metadata-file") + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with index. + final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); + // Updating remote cluster state with changing index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only global metadata is different + Metadata newMetadata = Metadata.builder(clusterState.metadata()) + .persistentSettings(Settings.builder().put("cluster.blocks.read_only", true).build()) + .build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterIndexMetadataUpdate + ); + + // Verify that index metadata information is same in manifest files + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(manifestAfterGlobalMetadataUpdate.getIndices().size())); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexName(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexName()) + ); + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getIndexUUID(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getIndexUUID()) + ); + + // since timestamp is part of file name, if file name is same we can confirm that file is not update in global metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getIndices().get(0).getUploadedFilename(), + is(manifestAfterGlobalMetadataUpdate.getIndices().get(0).getUploadedFilename()) + ); + + // global metadata file would have changed + assertFalse( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName() + .equalsIgnoreCase(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + } + + /* + * Here we will verify global metadata is not uploaded again if change is only in index metadata + */ + public void testIndexMetadataOnlyUpdated() throws IOException { + // setup + mockBlobStoreObjects(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + final ClusterState initialClusterState = ClusterState.builder(ClusterName.DEFAULT) + .metadata(Metadata.builder().coordinationMetadata(coordinationMetadata)) + .build(); + final ClusterMetadataManifest initialManifest = ClusterMetadataManifest.builder() + .codecVersion(2) + .indices(Collections.emptyList()) + .build(); + remoteClusterStateService.start(); + + // Initial cluster state with global metadata. + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + + // Updating remote cluster state with changing global metadata + final ClusterMetadataManifest manifestAfterGlobalMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + initialClusterState, + clusterState, + initialManifest + ); + + // new cluster state where only Index metadata is different + final IndexMetadata indexMetadata = new IndexMetadata.Builder("test").settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, "uuid") + .build() + ).numberOfShards(1).numberOfReplicas(0).build(); + Metadata newMetadata = Metadata.builder(clusterState.metadata()).put(indexMetadata, true).build(); + ClusterState newClusterState = ClusterState.builder(clusterState).metadata(newMetadata).build(); + + // updating remote cluster state with index metadata + final ClusterMetadataManifest manifestAfterIndexMetadataUpdate = remoteClusterStateService.writeIncrementalMetadata( + clusterState, + newClusterState, + manifestAfterGlobalMetadataUpdate + ); + + // Verify that global metadata information is same in manifest files after updating index Metadata + // since timestamp is part of file name, if file name is same we can confirm that file is not update in index metadata update + assertThat( + manifestAfterIndexMetadataUpdate.getGlobalMetadataFileName(), + is(manifestAfterGlobalMetadataUpdate.getGlobalMetadataFileName()) + ); + + // Index metadata would have changed + assertThat(manifestAfterGlobalMetadataUpdate.getIndices().size(), is(0)); + assertThat(manifestAfterIndexMetadataUpdate.getIndices().size(), is(1)); + } + public void testReadLatestMetadataManifestFailedIOException() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); @@ -398,6 +628,7 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); BlobContainer blobContainer = mockBlobStoreObjects(); @@ -405,7 +636,8 @@ public void testReadLatestMetadataManifestSuccessButNoIndexMetadata() throws IOE remoteClusterStateService.start(); assertEquals( - remoteClusterStateService.getLatestIndexMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() .size(), 0 ); @@ -433,10 +665,8 @@ public void testReadLatestMetadataManifestSuccessButIndexMetadataFetchIOExceptio remoteClusterStateService.start(); Exception e = assertThrows( IllegalStateException.class, - () -> remoteClusterStateService.getLatestIndexMetadata( - clusterState.getClusterName().value(), - clusterState.metadata().clusterUUID() - ) + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + .getIndices() ); assertEquals(e.getMessage(), "Error while downloading IndexMetadata - " + uploadedIndexMetadata.getUploadedFilename()); } @@ -454,6 +684,7 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { .clusterUUID("cluster-uuid") .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .codecVersion(ClusterMetadataManifest.CODEC_V0) .previousClusterUUID("prev-cluster-uuid") .build(); @@ -474,6 +705,70 @@ public void testReadLatestMetadataManifestSuccess() throws IOException { assertThat(manifest.getStateUUID(), is(expectedManifest.getStateUUID())); } + public void testReadGlobalMetadata() throws IOException { + when(blobStoreRepository.getNamedXContentRegistry()).thenReturn(new NamedXContentRegistry( + List.of(new NamedXContentRegistry.Entry(Metadata.Custom.class, new ParseField(IndexGraveyard.TYPE), IndexGraveyard::fromXContent)))); + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName("global-metadata-file") + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + mockBlobContainerForGlobalMetadata(mockBlobStoreObjects(), expectedManifest, expactedMetadata); + + Metadata metadata = remoteClusterStateService.getLatestMetadata( + clusterState.getClusterName().value(), + clusterState.metadata().clusterUUID() + ); + + assertTrue(Metadata.isGlobalStateEquals(metadata, expactedMetadata)); + } + + public void testReadGlobalMetadataIOException() throws IOException { + final ClusterState clusterState = generateClusterStateWithGlobalMetadata().nodes(nodesWithLocalNodeClusterManager()).build(); + remoteClusterStateService.start(); + String globalIndexMetadataName = "global-metadata-file"; + final ClusterMetadataManifest expectedManifest = ClusterMetadataManifest.builder() + .indices(List.of()) + .clusterTerm(1L) + .stateVersion(1L) + .stateUUID("state-uuid") + .clusterUUID("cluster-uuid") + .codecVersion(MANIFEST_CURRENT_CODEC_VERSION) + .globalMetadataFileName(globalIndexMetadataName) + .nodeId("nodeA") + .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) + .previousClusterUUID("prev-cluster-uuid") + .build(); + + Metadata expactedMetadata = Metadata.builder().persistentSettings(Settings.builder().put("readonly", true).build()).build(); + + BlobContainer blobContainer = mockBlobStoreObjects(); + mockBlobContainerForGlobalMetadata(blobContainer, expectedManifest, expactedMetadata); + + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(globalIndexMetadataName))).thenThrow( + FileNotFoundException.class + ); + + remoteClusterStateService.start(); + Exception e = assertThrows( + IllegalStateException.class, + () -> remoteClusterStateService.getLatestMetadata(clusterState.getClusterName().value(), clusterState.metadata().clusterUUID()) + ); + assertEquals(e.getMessage(), "Error while downloading Global Metadata - " + globalIndexMetadataName); + } + public void testReadLatestIndexMetadataSuccess() throws IOException { final ClusterState clusterState = generateClusterStateWithOneIndex().nodes(nodesWithLocalNodeClusterManager()).build(); remoteClusterStateService.start(); @@ -500,14 +795,16 @@ public void testReadLatestIndexMetadataSuccess() throws IOException { .nodeId("nodeA") .opensearchVersion(VersionUtils.randomOpenSearchVersion(random())) .previousClusterUUID("prev-cluster-uuid") + .globalMetadataFileName("global-metadata-file") + .codecVersion(ClusterMetadataManifest.CODEC_V0) .build(); mockBlobContainer(mockBlobStoreObjects(), expectedManifest, Map.of(index.getUUID(), indexMetadata)); - Map indexMetadataMap = remoteClusterStateService.getLatestIndexMetadata( + Map indexMetadataMap = remoteClusterStateService.getLatestMetadata( clusterState.getClusterName().value(), clusterState.metadata().clusterUUID() - ); + ).getIndices(); assertEquals(indexMetadataMap.size(), 1); assertEquals(indexMetadataMap.get(index.getUUID()).getIndex().getName(), index.getName()); @@ -673,6 +970,40 @@ public void testDeleteStaleClusterUUIDs() throws IOException { } } + public void testFileNames() { + final Index index = new Index("test-index", "index-uuid"); + final Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + .build(); + final IndexMetadata indexMetadata = new IndexMetadata.Builder(index.getName()).settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + + String indexMetadataFileName = RemoteClusterStateService.indexMetadataFileName(indexMetadata); + String[] splittedIndexMetadataFileName = indexMetadataFileName.split(DELIMITER); + assertThat(indexMetadataFileName.split(DELIMITER).length, is(4)); + assertThat(splittedIndexMetadataFileName[0], is(METADATA_FILE_PREFIX)); + assertThat(splittedIndexMetadataFileName[1], is(RemoteStoreUtils.invertLong(indexMetadata.getVersion()))); + assertThat(splittedIndexMetadataFileName[3], is(String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION))); + + int term = randomIntBetween(5, 10); + int version = randomIntBetween(5, 10); + String manifestFileName = RemoteClusterStateService.getManifestFileName(term, version, true); + assertThat(manifestFileName.split(DELIMITER).length, is(6)); + String[] splittedName = manifestFileName.split(DELIMITER); + assertThat(splittedName[0], is(MANIFEST_FILE_PREFIX)); + assertThat(splittedName[1], is(RemoteStoreUtils.invertLong(term))); + assertThat(splittedName[2], is(RemoteStoreUtils.invertLong(version))); + assertThat(splittedName[3], is("C")); + assertThat(splittedName[5], is(String.valueOf(MANIFEST_CURRENT_CODEC_VERSION))); + + manifestFileName = RemoteClusterStateService.getManifestFileName(term, version, false); + splittedName = manifestFileName.split(DELIMITER); + assertThat(splittedName[3], is("P")); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); @@ -782,6 +1113,7 @@ private ClusterMetadataManifest generateClusterMetadataManifest( .previousClusterUUID(previousClusterUUID) .committed(true) .clusterUUIDCommitted(true) + .globalMetadataFileName("test-global-metadata") .build(); } @@ -821,7 +1153,8 @@ private void mockBlobContainer( BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( clusterMetadataManifest, "manifestFileName", - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob("manifestFileName")).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); @@ -835,7 +1168,8 @@ private void mockBlobContainer( BytesReference bytesIndexMetadata = RemoteClusterStateService.INDEX_METADATA_FORMAT.serialize( indexMetadata, fileName, - blobStoreRepository.getCompressor() + blobStoreRepository.getCompressor(), + FORMAT_PARAMS ); when(blobContainer.readBlob(fileName + ".dat")).thenReturn( new ByteArrayInputStream(bytesIndexMetadata.streamInput().readAllBytes()) @@ -846,6 +1180,57 @@ private void mockBlobContainer( }); } + private void mockBlobContainerForGlobalMetadata( + BlobContainer blobContainer, + ClusterMetadataManifest clusterMetadataManifest, + Metadata metadata + ) throws IOException { + String mockManifestFileName = "manifest__1__2__C__456__1"; + BlobMetadata blobMetadata = new PlainBlobMetadata(mockManifestFileName, 1); + when( + blobContainer.listBlobsByPrefixInSortedOrder( + "manifest" + RemoteClusterStateService.DELIMITER, + 1, + BlobContainer.BlobNameSortOrder.LEXICOGRAPHIC + ) + ).thenReturn(Arrays.asList(blobMetadata)); + + BytesReference bytes = RemoteClusterStateService.CLUSTER_METADATA_MANIFEST_FORMAT.serialize( + clusterMetadataManifest, + mockManifestFileName, + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + when(blobContainer.readBlob(mockManifestFileName)).thenReturn(new ByteArrayInputStream(bytes.streamInput().readAllBytes())); + + BytesReference bytesGlobalMetadata = RemoteClusterStateService.GLOBAL_METADATA_FORMAT.serialize( + metadata, + "global-metadata-file", + blobStoreRepository.getCompressor(), + FORMAT_PARAMS + ); + String[] splitPath = clusterMetadataManifest.getGlobalMetadataFileName().split("/"); + when(blobContainer.readBlob(RemoteClusterStateService.GLOBAL_METADATA_FORMAT.blobName(splitPath[splitPath.length - 1]))).thenReturn( + new ByteArrayInputStream(bytesGlobalMetadata.streamInput().readAllBytes()) + ); + } + + private static ClusterState.Builder generateClusterStateWithGlobalMetadata() { + final Settings clusterSettings = Settings.builder().put("cluster.blocks.read_only", true).build(); + final CoordinationMetadata coordinationMetadata = CoordinationMetadata.builder().term(1L).build(); + + return ClusterState.builder(ClusterName.DEFAULT) + .version(1L) + .stateUUID("state-uuid") + .metadata( + Metadata.builder() + .persistentSettings(clusterSettings) + .clusterUUID("cluster-uuid") + .coordinationMetadata(coordinationMetadata) + .build() + ); + } + private static ClusterState.Builder generateClusterStateWithOneIndex() { final Index index = new Index("test-index", "index-uuid"); final Settings idxSettings = Settings.builder() diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index 09484cd1b5840..57509c5daa2b1 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -617,11 +617,18 @@ public void testFlushThrowsFlushFailedExceptionOnCorruption() throws Exception { indexOperations(nrtEngine, operations); // wipe the nrt directory initially so we can sync with primary. cleanAndCopySegmentsFromPrimary(nrtEngine); - nrtEngineStore.directory().deleteFile("_0.si"); + final Optional toDelete = Set.of(nrtEngineStore.directory().listAll()).stream().filter(f -> f.endsWith(".si")).findAny(); + assertTrue(toDelete.isPresent()); + nrtEngineStore.directory().deleteFile(toDelete.get()); assertThrows(FlushFailedEngineException.class, nrtEngine::flush); - assertTrue(nrtEngineStore.isMarkedCorrupted()); - // store will throw when eventually closed, not handled here. - assertThrows(RuntimeException.class, nrtEngineStore::close); + nrtEngine.close(); + if (nrtEngineStore.isMarkedCorrupted()) { + assertThrows(RuntimeException.class, nrtEngineStore::close); + } else { + // With certain mock directories a NoSuchFileException is thrown which is not treated as a + // corruption Exception. In these cases we don't expect any issue on store close. + nrtEngineStore.close(); + } } private void copySegments(Collection latestPrimaryFiles, Engine nrtEngine) throws IOException { diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java index 6a99063d11353..fe389e3b3fcb4 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteIndexShardTests.java @@ -43,6 +43,7 @@ import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; import java.util.stream.Collectors; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -388,9 +389,10 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { - super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, listener); + super.getSegmentFiles(replicationId, checkpoint, filesToFetch, indexShard, (fileName, bytesRecovered) -> {}, listener); runAfterGetFiles[index.getAndIncrement()].run(); } diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java index 528402d48658a..52f28aead533d 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationIndexShardTests.java @@ -91,6 +91,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.BiConsumer; import java.util.function.Function; import static org.opensearch.index.engine.EngineTestCase.assertAtMostOneLuceneDocumentPerSequenceNumber; @@ -725,6 +726,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // set the listener, we will only fail it once we cancel the source. diff --git a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java index c394101697b47..f0950fe5392de 100644 --- a/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/SegmentReplicationWithNodeToNodeIndexShardTests.java @@ -47,6 +47,7 @@ import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.function.Function; import java.util.stream.Collectors; @@ -87,6 +88,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { // randomly resolve the listener, indicating the source has resolved. @@ -131,6 +133,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be reached"); @@ -176,6 +179,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); @@ -223,6 +227,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) {} }; @@ -269,6 +274,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); diff --git a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java index bcacef83d190a..e4dd32e5c6f70 100644 --- a/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/PrimaryShardReplicationSourceTests.java @@ -125,6 +125,7 @@ public void testGetSegmentFiles() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -153,6 +154,7 @@ public void testTransportTimeoutForGetSegmentFilesAction() { checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, mock(ActionListener.class) ); CapturingTransport.CapturedRequest[] requestList = transport.getCapturedRequestsAndClear(); @@ -178,6 +180,7 @@ public void testGetSegmentFiles_CancelWhileRequestOpen() throws InterruptedExcep checkpoint, Arrays.asList(testMetadata), mock(IndexShard.class), + (fileName, bytesRecovered) -> {}, new ActionListener<>() { @Override public void onResponse(GetSegmentFilesResponse getSegmentFilesResponse) { diff --git a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java index 9204f48ba5bdd..287962b158c79 100644 --- a/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/RemoteStoreReplicationSourceTests.java @@ -90,7 +90,7 @@ public void testGetSegmentFiles() throws ExecutionException, InterruptedExceptio List filesToFetch = primaryShard.getSegmentMetadataMap().values().stream().collect(Collectors.toList()); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, res); + replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, replicaShard, (fileName, bytesRecovered) -> {}, res); GetSegmentFilesResponse response = res.get(); assertEquals(response.files.size(), filesToFetch.size()); assertTrue(response.files.containsAll(filesToFetch)); @@ -104,7 +104,14 @@ public void testGetSegmentFilesAlreadyExists() throws IOException, InterruptedEx try { final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, filesToFetch, primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + filesToFetch, + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); res.get(); } catch (AssertionError | ExecutionException ex) { latch.countDown(); @@ -118,7 +125,14 @@ public void testGetSegmentFilesReturnEmptyResponse() throws ExecutionException, final ReplicationCheckpoint checkpoint = primaryShard.getLatestReplicationCheckpoint(); final PlainActionFuture res = PlainActionFuture.newFuture(); replicationSource = new RemoteStoreReplicationSource(primaryShard); - replicationSource.getSegmentFiles(REPLICATION_ID, checkpoint, Collections.emptyList(), primaryShard, res); + replicationSource.getSegmentFiles( + REPLICATION_ID, + checkpoint, + Collections.emptyList(), + primaryShard, + (fileName, bytesRecovered) -> {}, + res + ); GetSegmentFilesResponse response = res.get(); assert (response.files.isEmpty()); } diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java index c108de5ee5ea6..7b02635525264 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetServiceTests.java @@ -39,6 +39,7 @@ import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.CapturingTransport; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -52,6 +53,7 @@ import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.function.BiConsumer; import static org.junit.Assert.assertEquals; import static org.mockito.ArgumentMatchers.any; @@ -211,6 +213,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Should not be called"); @@ -246,7 +249,7 @@ public void testAlreadyOnNewCheckpoint() { verify(spy, times(0)).startReplication(any(), any(), any()); } - @AwaitsFix(bugUrl = "https://github.com/opensearch-project/OpenSearch/issues/8928") + @TestLogging(reason = "Getting trace logs from replication package", value = "org.opensearch.indices.replication:TRACE") public void testShardAlreadyReplicating() { CountDownLatch blockGetCheckpointMetadata = new CountDownLatch(1); SegmentReplicationSource source = new TestReplicationSource() { @@ -276,6 +279,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(Collections.emptyList())); @@ -333,6 +337,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { Assert.fail("Unreachable"); diff --git a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java index a9d7d3cdd32fc..8b4b3aff701b4 100644 --- a/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java +++ b/server/src/test/java/org/opensearch/indices/replication/SegmentReplicationTargetTests.java @@ -40,6 +40,7 @@ import org.opensearch.index.store.StoreTests; import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.common.ReplicationFailedException; +import org.opensearch.indices.replication.common.ReplicationLuceneIndex; import org.opensearch.indices.replication.common.ReplicationType; import org.opensearch.test.DummyShardLock; import org.opensearch.test.IndexSettingsModule; @@ -53,6 +54,7 @@ import java.util.List; import java.util.Map; import java.util.Random; +import java.util.function.BiConsumer; import org.mockito.Mockito; @@ -131,10 +133,12 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { assertEquals(1, filesToFetch.size()); assert (filesToFetch.contains(SEGMENT_FILE)); + filesToFetch.forEach(storeFileMetadata -> fileProgressTracker.accept(storeFileMetadata.name(), storeFileMetadata.length())); listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); } }; @@ -149,6 +153,19 @@ public void getSegmentFiles( public void onResponse(Void replicationResponse) { try { verify(spyIndexShard, times(1)).finalizeReplication(any()); + assertEquals( + 1, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); + assertEquals( + 0, + segrepTarget.state().getIndex().fileDetails().stream().filter(file -> file.fullyRecovered() == false).count() + ); segrepTarget.markAsDone(); } catch (IOException ex) { Assert.fail(); @@ -182,6 +199,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -200,6 +218,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -225,6 +252,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onFailure(exception); @@ -243,6 +271,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertEquals(exception, e.getCause().getCause()); segrepTarget.fail(new ReplicationFailedException(e), false); } @@ -268,6 +305,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -314,6 +352,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -358,6 +397,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); @@ -376,6 +416,15 @@ public void onResponse(Void replicationResponse) { @Override public void onFailure(Exception e) { + assertEquals( + 0, + segrepTarget.state() + .getIndex() + .fileDetails() + .stream() + .filter(ReplicationLuceneIndex.FileMetadata::fullyRecovered) + .count() + ); assertTrue(e instanceof OpenSearchCorruptionException); assertTrue(e.getMessage().contains("has local copies of segments that differ from the primary")); segrepTarget.fail(new ReplicationFailedException(e), false); @@ -410,6 +459,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { listener.onResponse(new GetSegmentFilesResponse(filesToFetch)); diff --git a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java index 0fd039b84e887..0059f8e215f2e 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java @@ -46,7 +46,12 @@ public void testDeviceStats() { final int sectorsRead = randomIntBetween(8 * readsCompleted, 16 * readsCompleted); final int writesCompleted = randomIntBetween(1, 1 << 16); final int sectorsWritten = randomIntBetween(8 * writesCompleted, 16 * writesCompleted); - + final int readTime = randomIntBetween(1, 1 << 16); + ; + final int writeTime = randomIntBetween(1, 1 << 16); + ; + final int queueSize = randomIntBetween(1, 1 << 16); + final int ioTime = randomIntBetween(1, 1 << 16); FsInfo.DeviceStats previous = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -55,6 +60,10 @@ public void testDeviceStats() { sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, null ); FsInfo.DeviceStats current = new FsInfo.DeviceStats( @@ -65,6 +74,10 @@ public void testDeviceStats() { sectorsRead + 16384, writesCompleted + 2048, sectorsWritten + 32768, + readTime + 500, + writeTime + 100, + queueSize + 20, + ioTime + 8192, previous ); assertThat(current.operations(), equalTo(1024L + 2048L)); @@ -72,6 +85,10 @@ public void testDeviceStats() { assertThat(current.writeOperations(), equalTo(2048L)); assertThat(current.readKilobytes(), equalTo(16384L / 2)); assertThat(current.writeKilobytes(), equalTo(32768L / 2)); + assertEquals(500, current.readTime()); + assertEquals(100, current.writeTime()); + assertEquals(20, current.queueSize()); + assertEquals(8192, current.ioTimeInMillis()); } } diff --git a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java index 686a624d988d7..59a888c665be7 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java @@ -91,6 +91,14 @@ public void testFsInfo() throws IOException { assertThat(deviceStats.previousWritesCompleted, equalTo(-1L)); assertThat(deviceStats.currentSectorsWritten, greaterThanOrEqualTo(0L)); assertThat(deviceStats.previousSectorsWritten, equalTo(-1L)); + assertThat(deviceStats.currentReadTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousReadTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentWriteTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousWriteTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentQueueSize, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousQueueSize, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentIOTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousIOTime, greaterThanOrEqualTo(-1L)); } } else { assertNull(stats.getIoStats()); @@ -243,6 +251,16 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[0].previousWritesCompleted, equalTo(-1L)); assertThat(first.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(first.devicesStats[0].previousSectorsWritten, equalTo(-1L)); + + assertEquals(33457, first.devicesStats[0].currentReadTime); + assertEquals(-1, first.devicesStats[0].previousReadTime); + assertEquals(18730966, first.devicesStats[0].currentWriteTime); + assertEquals(-1, first.devicesStats[0].previousWriteTime); + assertEquals(18767169, first.devicesStats[0].currentQueueSize); + assertEquals(-1, first.devicesStats[0].previousQueueSize); + assertEquals(1918440, first.devicesStats[0].currentIOTime); + assertEquals(-1, first.devicesStats[0].previousIOTime); + assertThat(first.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(first.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(first.devicesStats[1].deviceName, equalTo("dm-2")); @@ -255,6 +273,15 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[1].currentSectorsWritten, equalTo(64126096L)); assertThat(first.devicesStats[1].previousSectorsWritten, equalTo(-1L)); + assertEquals(49312, first.devicesStats[1].currentReadTime); + assertEquals(-1, first.devicesStats[1].previousReadTime); + assertEquals(33730596, first.devicesStats[1].currentWriteTime); + assertEquals(-1, first.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1, first.devicesStats[1].previousIOTime); + diskStats.set( Arrays.asList( " 259 0 nvme0n1 336870 0 7928397 82876 10264393 0 182986405 52451610 0 2971042 52536492", @@ -281,6 +308,16 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[0].previousWritesCompleted, equalTo(8398869L)); assertThat(second.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(second.devicesStats[0].previousSectorsWritten, equalTo(118857776L)); + + assertEquals(33464, second.devicesStats[0].currentReadTime); + assertEquals(33457, second.devicesStats[0].previousReadTime); + assertEquals(18730966, second.devicesStats[0].currentWriteTime); + assertEquals(18730966, second.devicesStats[0].previousWriteTime); + assertEquals(18767176, second.devicesStats[0].currentQueueSize); + assertEquals(18767169, second.devicesStats[0].previousQueueSize); + assertEquals(1918444, second.devicesStats[0].currentIOTime); + assertEquals(1918440, second.devicesStats[0].previousIOTime); + assertThat(second.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(second.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(second.devicesStats[1].deviceName, equalTo("dm-2")); @@ -293,11 +330,25 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[1].currentSectorsWritten, equalTo(64128568L)); assertThat(second.devicesStats[1].previousSectorsWritten, equalTo(64126096L)); + assertEquals(49369, second.devicesStats[1].currentReadTime); + assertEquals(49312, second.devicesStats[1].previousReadTime); + assertEquals(33730766, second.devicesStats[1].currentWriteTime); + assertEquals(33730596, second.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1L, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1L, first.devicesStats[1].previousIOTime); + assertThat(second.totalOperations, equalTo(575L)); assertThat(second.totalReadOperations, equalTo(261L)); assertThat(second.totalWriteOperations, equalTo(314L)); assertThat(second.totalReadKilobytes, equalTo(2392L)); assertThat(second.totalWriteKilobytes, equalTo(1236L)); + + assertEquals(64, second.totalReadTime); + assertEquals(170, second.totalWriteTime); + assertEquals(236, second.totalQueueSize); + assertEquals(158, second.totalIOTimeInMillis); } public void testAdjustForHugeFilesystems() throws Exception { diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java index c4599a6e7a00e..43ebb86fd5342 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesServiceTests.java @@ -219,20 +219,17 @@ public void testRepositoriesStatsCanHaveTheSameNameAndDifferentTypeOverTime() { assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", emptyState(), clusterStateWithRepoTypeA)); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(0)); ClusterState clusterStateWithRepoTypeB = createClusterStateWithRepo(repoName, MeteredRepositoryTypeB.TYPE); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); List repositoriesStats = repositoriesService.repositoriesStats(); - assertThat(repositoriesStats.size(), equalTo(2)); + assertThat(repositoriesStats.size(), equalTo(1)); RepositoryStatsSnapshot repositoryStatsTypeA = repositoriesStats.get(0); - assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeA.TYPE)); - assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeA.STATS)); + assertThat(repositoryStatsTypeA.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); + assertThat(repositoryStatsTypeA.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); - RepositoryStatsSnapshot repositoryStatsTypeB = repositoriesStats.get(1); - assertThat(repositoryStatsTypeB.getRepositoryInfo().type, equalTo(MeteredRepositoryTypeB.TYPE)); - assertThat(repositoryStatsTypeB.getRepositoryStats(), equalTo(MeteredRepositoryTypeB.STATS)); } public void testWithSameKeyProviderNames() { @@ -258,7 +255,7 @@ public void testWithSameKeyProviderNames() { kpTypeA ); repositoriesService.applyClusterState(new ClusterChangedEvent("new repo", clusterStateWithRepoTypeB, emptyState())); - assertThat(repositoriesService.repositoriesStats().size(), equalTo(2)); + assertThat(repositoriesService.repositoriesStats().size(), equalTo(1)); MeteredRepositoryTypeB repositoryB = (MeteredRepositoryTypeB) repositoriesService.repository("repoName"); assertNotNull(repositoryB); assertNotNull(repository.cryptoHandler); diff --git a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java index cf0b06a3f7d16..da0cbcb1d4b17 100644 --- a/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java +++ b/server/src/test/java/org/opensearch/repositories/RepositoriesStatsArchiveTests.java @@ -32,7 +32,6 @@ package org.opensearch.repositories; -import org.opensearch.common.UUIDs; import org.opensearch.common.unit.TimeValue; import org.opensearch.test.OpenSearchTestCase; @@ -122,14 +121,11 @@ private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repository private RepositoryStatsSnapshot createRepositoryStats(RepositoryStats repositoryStats, long clusterVersion) { RepositoryInfo repositoryInfo = new RepositoryInfo( - UUIDs.randomBase64UUID(), randomAlphaOfLength(10), randomAlphaOfLength(10), - Map.of("bucket", randomAlphaOfLength(10)), - System.currentTimeMillis(), - null + Map.of("bucket", randomAlphaOfLength(10)) ); - return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion, true); + return new RepositoryStatsSnapshot(repositoryInfo, repositoryStats, clusterVersion); } } diff --git a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java index 03f0d27188027..c114b56bd0b39 100644 --- a/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java +++ b/server/src/test/java/org/opensearch/snapshots/BlobStoreFormatTests.java @@ -152,14 +152,16 @@ public void onFailure(Exception e) { mockBlobContainer, "check-smile", CompressorRegistry.none(), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); checksumSMILE.writeAsync( new BlobObj("checksum smile compressed"), mockBlobContainer, "check-smile-comp", CompressorRegistry.getCompressor(DeflateCompressor.NAME), - actionListener + actionListener, + ChecksumBlobStoreFormat.SNAPSHOT_ONLY_FORMAT_PARAMS ); latch.await(); diff --git a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java index 5d5ea62dd161e..80942123fd4fd 100644 --- a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java @@ -63,6 +63,18 @@ public void testGetMetricsWithAvailableMetricsTelemetry() { } + public void testNullMetricsTelemetry() { + Settings settings = Settings.builder().put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getMetricsTelemetry()).thenReturn(null); + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, Optional.of(mockTelemetry)); + + MetricsRegistry metricsRegistry = metricsRegistryFactory.getMetricsRegistry(); + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java index b27f888eaf502..3a388be22445e 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java @@ -83,6 +83,18 @@ public void testGetTracerWithAvailableTracingTelemetryReturnsWrappedTracer() { } + public void testNullTracer() { + Settings settings = Settings.builder().put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getTracingTelemetry()).thenReturn(null); + tracerFactory = new TracerFactory(telemetrySettings, Optional.of(mockTelemetry), new ThreadContext(Settings.EMPTY)); + + Tracer tracer = tracerFactory.getTracer(); + assertTrue(tracer instanceof NoopTracer); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java index 43e0cb8e44439..8606104d26103 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class WrappedTracerTests extends OpenSearchTestCase { @@ -38,6 +39,7 @@ public void testStartSpanWithTracingDisabledInvokesNoopTracer() throws Exception SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof NoopTracer); + assertFalse(wrappedTracer.isRecording()); verify(mockDefaultTracer, never()).startSpan(SpanCreationContext.internal().name("foo")); } } @@ -46,12 +48,13 @@ public void testStartSpanWithTracingEnabledInvokesDefaultTracer() throws Excepti Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true).build(); TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); - + when(mockDefaultTracer.isRecording()).thenReturn(true); try (WrappedTracer wrappedTracer = new WrappedTracer(telemetrySettings, mockDefaultTracer)) { SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof DefaultTracer); + assertTrue(wrappedTracer.isRecording()); verify(mockDefaultTracer).startSpan(eq(spanCreationContext)); } } diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index 7de98c0986b87..13345fcb20de1 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -71,6 +71,6 @@ dependencies { runtimeOnly("com.squareup.okhttp3:okhttp:4.11.0") { exclude group: "com.squareup.okio" } - runtimeOnly "com.squareup.okio:okio:3.5.0" + runtimeOnly "com.squareup.okio:okio:3.6.0" runtimeOnly "org.xerial.snappy:snappy-java:1.1.10.5" } diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index a520b6278ea47..60a54110fd0b4 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -121,7 +121,8 @@ List adjustNodesStats(List nodesStats) { nodeStats.getWeightedRoutingStats(), nodeStats.getFileCacheStats(), nodeStats.getTaskCancellationStats(), - nodeStats.getSearchPipelineStats() + nodeStats.getSearchPipelineStats(), + nodeStats.getRepositoriesStats() ); }).collect(Collectors.toList()); } diff --git a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java index b29e25a0bff2c..bcd47e3d578ee 100644 --- a/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java +++ b/test/framework/src/main/java/org/opensearch/index/replication/TestReplicationSource.java @@ -17,6 +17,7 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import java.util.List; +import java.util.function.BiConsumer; /** * This class is used by unit tests implementing SegmentReplicationSource @@ -36,6 +37,7 @@ public abstract void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ); diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 9dc230474482f..412d5235fe462 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -174,6 +174,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; import java.util.function.BiFunction; import java.util.function.Consumer; import java.util.function.Function; @@ -1620,6 +1621,7 @@ public void getSegmentFiles( ReplicationCheckpoint checkpoint, List filesToFetch, IndexShard indexShard, + BiConsumer fileProgressTracker, ActionListener listener ) { try ( diff --git a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java index dff9b997d87db..faa9d52b105b2 100644 --- a/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/repositories/blobstore/OpenSearchMockAPIBasedRepositoryIntegTestCase.java @@ -238,7 +238,7 @@ public void testRequestStats() throws Exception { assertEquals(assertionErrorMsg, mockCalls, sdkRequestCounts); } - private Map getMockRequestCounts() { + protected Map getMockRequestCounts() { for (HttpHandler h : handlers.values()) { while (h instanceof DelegatingHttpHandler) { if (h instanceof HttpStatsCollectorHandler) { diff --git a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java index 3c7423f73685f..898e125b94954 100644 --- a/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/opensearch/test/InternalTestCluster.java @@ -1853,10 +1853,12 @@ public synchronized void stopRandomNodeNotCurrentMaster() throws IOException { */ public void stopAllNodes() { try { - int totalDataNodes = numDataNodes(); - while (totalDataNodes > 0) { - stopRandomDataNode(); - totalDataNodes -= 1; + if (numDataAndClusterManagerNodes() != numClusterManagerNodes()) { + int totalDataNodes = numDataNodes(); + while (totalDataNodes > 0) { + stopRandomDataNode(); + totalDataNodes -= 1; + } } int totalClusterManagerNodes = numClusterManagerNodes(); while (totalClusterManagerNodes > 1) { @@ -2719,6 +2721,7 @@ public void ensureEstimatedStats() { false, false, false, + false, false ); assertThat( diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index da829a3bc5225..c16cc1d2a5fba 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -1928,6 +1928,7 @@ protected Settings nodeSettings(int nodeOrdinal) { // Enable tracer only when Telemetry Setting is enabled if (featureFlagSettings().getAsBoolean(FeatureFlags.TELEMETRY_SETTING.getKey(), false)) { + builder.put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true); builder.put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true); } if (FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING.get(featureFlagSettings)) { diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java index f14fe3bf3961c..efc29d1c254e6 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java @@ -254,6 +254,7 @@ private Node newNode() { .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) .put(FeatureFlags.TELEMETRY_SETTING.getKey(), true) .put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true) + .put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true) .put(nodeSettings()) // allow test cases to provide their own settings or override these .put(featureFlagSettings); if (FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING.get(featureFlagSettings)) { @@ -271,6 +272,7 @@ private Node newNode() { plugins.add(MockHttpTransport.TestPlugin.class); } plugins.add(MockScriptService.TestPlugin.class); + plugins.add(MockTelemetryPlugin.class); Node node = new MockNode(settingsBuilder.build(), plugins, forbidPrivateIndexSettings()); try { diff --git a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java index a1801aa125148..dda413ce2818e 100644 --- a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java +++ b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java @@ -43,7 +43,7 @@ public Counter createCounter(String name, String description, String unit) { @Override public Counter createUpDownCounter(String name, String description, String unit) { - return null; + return NoopCounter.INSTANCE; } @Override