From 197008599510dff66f1d515e8c224113427364bf Mon Sep 17 00:00:00 2001 From: "opensearch-trigger-bot[bot]" <98922864+opensearch-trigger-bot[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 07:41:09 -0400 Subject: [PATCH 1/2] Add telemetry tracer/metric enable flag and integ test (#10395) (#10676) * Add telemetry tracer/metric enable flag and integ test * Add Changelog * Fix compilation issue * Empty-Commit * Add component flag to traceable wrappers * Address review comment * Address review comment * Address review comment * Address review comment * Address review comment * Address review comment --------- (cherry picked from commit a36ab39a3e7da385ccd3651313b1e953a87fae7a) Signed-off-by: Gagan Juneja Signed-off-by: Gagan Juneja Signed-off-by: github-actions[bot] Co-authored-by: github-actions[bot] Co-authored-by: Gagan Juneja --- CHANGELOG.md | 1 + .../telemetry/metrics/MetricsTelemetry.java | 4 +- .../telemetry/tracing/DefaultTracer.java | 5 + .../opensearch/telemetry/tracing/Tracer.java | 6 ++ .../telemetry/tracing/noop/NoopTracer.java | 5 + .../telemetry/tracing/DefaultTracerTests.java | 1 + .../IntegrationTestOTelTelemetryPlugin.java | 5 +- .../InMemorySingletonMetricsExporter.java | 65 ++++++++++++ .../TelemetryMetricsDisabledSanityIT.java | 62 ++++++++++++ .../TelemetryMetricsEnabledSanityIT.java | 99 +++++++++++++++++++ .../TelemetryTracerDisabledSanityIT.java | 1 + .../TelemetryTracerEnabledSanityIT.java | 5 +- .../telemetry/OTelTelemetryPlugin.java | 27 +++-- .../metrics/OTelMetricsTelemetry.java | 9 +- .../telemetry/tracing/OTelTelemetry.java | 23 +++-- .../tracing/OTelTracingTelemetry.java | 15 +-- .../metrics/OTelMetricsTelemetryTests.java | 20 +++- .../tracing/OTelTracingTelemetryTests.java | 21 +++- .../common/settings/ClusterSettings.java | 8 +- .../main/java/org/opensearch/node/Node.java | 21 +++- .../telemetry/TelemetrySettings.java | 27 +++++ .../telemetry/tracing/WrappedTracer.java | 5 + .../channels/TraceableHttpChannel.java | 3 +- .../channels/TraceableRestChannel.java | 3 +- .../TraceableTcpTransportChannel.java | 3 +- .../TraceableTransportResponseHandler.java | 3 +- .../listener/TraceableActionListener.java | 3 +- .../metrics/MetricsRegistryFactoryTests.java | 12 +++ .../telemetry/tracing/TracerFactoryTests.java | 12 +++ .../telemetry/tracing/WrappedTracerTests.java | 5 +- .../test/OpenSearchIntegTestCase.java | 1 + .../test/OpenSearchSingleNodeTestCase.java | 2 + .../test/telemetry/MockTelemetry.java | 5 +- 33 files changed, 429 insertions(+), 58 deletions(-) rename plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/{tracing => }/IntegrationTestOTelTelemetryPlugin.java (85%) create mode 100644 plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java create mode 100644 plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java create mode 100644 plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java diff --git a/CHANGELOG.md b/CHANGELOG.md index cff073d266f68..208b611b49e37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Backport the PR #9107 for updating CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_KEY setting to a dynamic setting ([#10606](https://github.com/opensearch-project/OpenSearch/pull/10606)) - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) - [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) +- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) ### Deprecated diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java index 2f70c28efb1cd..fb3dec8152b4f 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java @@ -10,14 +10,12 @@ import org.opensearch.common.annotation.ExperimentalApi; -import java.io.Closeable; - /** * Interface for metrics telemetry providers * * @opensearch.experimental */ @ExperimentalApi -public interface MetricsTelemetry extends MetricsRegistry, Closeable { +public interface MetricsTelemetry extends MetricsRegistry { } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java index 79b7e4aca6c2f..a3bb64ea392a9 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java @@ -85,6 +85,11 @@ public SpanScope withSpanInScope(Span span) { return DefaultSpanScope.create(span, tracerContextStorage).attach(); } + @Override + public boolean isRecording() { + return true; + } + private Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) { return tracingTelemetry.createSpan(spanCreationContext, parentSpan); } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java index e6d4878a5e833..8257d251e9560 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java @@ -53,4 +53,10 @@ public interface Tracer extends HttpTracer, Closeable { */ SpanScope withSpanInScope(Span span); + /** + * Tells if the traces are being recorded or not + * @return boolean + */ + boolean isRecording(); + } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java index c073e8d3e766f..50452ff5fe3b4 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java @@ -54,6 +54,11 @@ public SpanScope withSpanInScope(Span span) { return SpanScope.NO_OP; } + @Override + public boolean isRecording() { + return false; + } + @Override public void close() { diff --git a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java index 0a717e993cb81..2a791f1ae4164 100644 --- a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java +++ b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java @@ -62,6 +62,7 @@ public void testCreateSpan() { String spanName = defaultTracer.getCurrentSpan().getSpan().getSpanName(); assertEquals("span_name", spanName); + assertTrue(defaultTracer.isRecording()); } @SuppressWarnings("unchecked") diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java similarity index 85% rename from plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java rename to plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java index ed4d13f3abb7d..45caf8bf5f60b 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java @@ -6,12 +6,9 @@ * compatible open source license. */ -package org.opensearch.telemetry.tracing; +package org.opensearch.telemetry; import org.opensearch.common.settings.Settings; -import org.opensearch.telemetry.OTelTelemetryPlugin; -import org.opensearch.telemetry.Telemetry; -import org.opensearch.telemetry.TelemetrySettings; import java.util.Optional; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java new file mode 100644 index 0000000000000..74fc872cb30e3 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import java.util.Collection; +import java.util.List; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.testing.exporter.InMemoryMetricExporter; + +public class InMemorySingletonMetricsExporter implements MetricExporter { + + public static final InMemorySingletonMetricsExporter INSTANCE = new InMemorySingletonMetricsExporter(InMemoryMetricExporter.create()); + + private static InMemoryMetricExporter delegate; + + public static InMemorySingletonMetricsExporter create() { + return INSTANCE; + } + + private InMemorySingletonMetricsExporter(InMemoryMetricExporter delegate) { + InMemorySingletonMetricsExporter.delegate = delegate; + } + + @Override + public CompletableResultCode export(Collection metrics) { + return delegate.export(metrics); + } + + @Override + public CompletableResultCode flush() { + return delegate.flush(); + } + + @Override + public CompletableResultCode shutdown() { + return delegate.shutdown(); + } + + public List getFinishedMetricItems() { + return delegate.getFinishedMetricItems(); + } + + /** + * Clears the state. + */ + public void reset() { + delegate.reset(); + } + + @Override + public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { + return delegate.getAggregationTemporality(instrumentType); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java new file mode 100644 index 0000000000000..bcdcb657c4f42 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.telemetry.metrics.noop.NoopCounter; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.Arrays; +import java.util.Collection; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, minNumDataNodes = 1) +public class TelemetryMetricsDisabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testSanityChecksWhenMetricsDisabled() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + + Thread.sleep(2000); + + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + assertTrue(counter instanceof NoopCounter); + } + +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java new file mode 100644 index 0000000000000..ed341595d327d --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Collectors; + +import io.opentelemetry.sdk.metrics.data.DoublePointData; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 1) +public class TelemetryMetricsEnabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), true) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testCounter() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(1.0, value, 0.0); + } + + public void testUpDownCounter() throws Exception { + + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createUpDownCounter("test-up-down-counter", "test", "1"); + counter.add(1.0); + counter.add(-2.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-up-down-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(-1.0, value, 0.0); + } + + @After + public void reset() { + InMemorySingletonMetricsExporter.INSTANCE.reset(); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java index 949a58f6cab41..45ed140e1be94 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.test.OpenSearchIntegTestCase; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java index 8a49a0abf5512..f07f2b308e801 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.telemetry.tracing.attributes.Attributes; @@ -88,9 +89,7 @@ public void testSanityChecksWhenTracingEnabled() throws Exception { ); InMemorySingletonSpanExporter exporter = InMemorySingletonSpanExporter.INSTANCE; - if (!exporter.getFinishedSpanItems().isEmpty()) { - validators.validate(exporter.getFinishedSpanItems(), 6); - } + validators.validate(exporter.getFinishedSpanItems(), 6); } private static void updateTelemetrySetting(Client client, boolean value) { diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java index b57876c9310f3..297ae8873636f 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java @@ -8,14 +8,13 @@ package org.opensearch.telemetry; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.TelemetryPlugin; -import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; import org.opensearch.telemetry.tracing.OTelResourceProvider; import org.opensearch.telemetry.tracing.OTelTelemetry; -import org.opensearch.telemetry.tracing.OTelTracingTelemetry; import java.util.Arrays; import java.util.List; @@ -37,6 +36,8 @@ public class OTelTelemetryPlugin extends Plugin implements TelemetryPlugin { private final Settings settings; + private RefCountedReleasable refCountedOpenTelemetry; + /** * Creates Otel plugin * @param settings cluster settings @@ -58,20 +59,32 @@ public List> getSettings() { @Override public Optional getTelemetry(TelemetrySettings telemetrySettings) { + initializeOpenTelemetrySdk(telemetrySettings); return Optional.of(telemetry(telemetrySettings)); } + private void initializeOpenTelemetrySdk(TelemetrySettings telemetrySettings) { + if (refCountedOpenTelemetry != null) { + return; + } + OpenTelemetrySdk openTelemetrySdk = OTelResourceProvider.get(telemetrySettings, settings); + refCountedOpenTelemetry = new RefCountedReleasable<>("openTelemetry", openTelemetrySdk, openTelemetrySdk::close); + } + @Override public String getName() { return OTEL_TRACER_NAME; } private Telemetry telemetry(TelemetrySettings telemetrySettings) { - final OpenTelemetrySdk openTelemetry = OTelResourceProvider.get(telemetrySettings, settings); - return new OTelTelemetry( - new OTelTracingTelemetry<>(openTelemetry, openTelemetry.getSdkTracerProvider()), - new OTelMetricsTelemetry<>(openTelemetry.getSdkMeterProvider()) - ); + return new OTelTelemetry(refCountedOpenTelemetry); + } + + @Override + public void close() { + if (refCountedOpenTelemetry != null) { + refCountedOpenTelemetry.close(); + } } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java index 8598e5976d20d..6160e5106c041 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; @@ -19,19 +20,24 @@ import io.opentelemetry.api.metrics.DoubleUpDownCounter; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.api.metrics.MeterProvider; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel implementation for {@link MetricsTelemetry} */ public class OTelMetricsTelemetry implements MetricsTelemetry { + private final RefCountedReleasable refCountedOpenTelemetry; private final Meter otelMeter; private final T meterProvider; /** * Creates OTel based {@link MetricsTelemetry}. + * @param openTelemetry open telemetry. * @param meterProvider {@link MeterProvider} instance */ - public OTelMetricsTelemetry(T meterProvider) { + public OTelMetricsTelemetry(RefCountedReleasable openTelemetry, T meterProvider) { + this.refCountedOpenTelemetry = openTelemetry; + this.refCountedOpenTelemetry.incRef(); this.meterProvider = meterProvider; this.otelMeter = meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -63,5 +69,6 @@ public Counter createUpDownCounter(String name, String description, String unit) @Override public void close() throws IOException { meterProvider.close(); + refCountedOpenTelemetry.close(); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java index 282fabd43346b..0c697d2cc5e8c 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java @@ -8,34 +8,39 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.Telemetry; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; + +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * Otel implementation of Telemetry */ public class OTelTelemetry implements Telemetry { - private final TracingTelemetry tracingTelemetry; - private final MetricsTelemetry metricsTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; /** * Creates Telemetry instance - * @param tracingTelemetry tracing telemetry - * @param metricsTelemetry metrics telemetry + + */ + /** + * Creates Telemetry instance + * @param refCountedOpenTelemetry open telemetry. */ - public OTelTelemetry(TracingTelemetry tracingTelemetry, MetricsTelemetry metricsTelemetry) { - this.tracingTelemetry = tracingTelemetry; - this.metricsTelemetry = metricsTelemetry; + public OTelTelemetry(RefCountedReleasable refCountedOpenTelemetry) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; } @Override public TracingTelemetry getTracingTelemetry() { - return tracingTelemetry; + return new OTelTracingTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkTracerProvider()); } @Override public MetricsTelemetry getMetricsTelemetry() { - return metricsTelemetry; + return new OTelMetricsTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkMeterProvider()); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java index f88afe623fd56..af39617a8c744 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java @@ -8,31 +8,33 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; import java.io.IOException; -import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel based Telemetry provider */ public class OTelTracingTelemetry implements TracingTelemetry { - private final OpenTelemetry openTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; private final T tracerProvider; private final io.opentelemetry.api.trace.Tracer otelTracer; /** * Creates OTel based {@link TracingTelemetry} - * @param openTelemetry OpenTelemetry instance + * @param refCountedOpenTelemetry OpenTelemetry instance * @param tracerProvider {@link TracerProvider} instance. */ - public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { - this.openTelemetry = openTelemetry; + public OTelTracingTelemetry(RefCountedReleasable refCountedOpenTelemetry, T tracerProvider) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; + this.refCountedOpenTelemetry.incRef(); this.tracerProvider = tracerProvider; this.otelTracer = tracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -40,6 +42,7 @@ public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { @Override public void close() throws IOException { tracerProvider.close(); + refCountedOpenTelemetry.close(); } @Override @@ -49,7 +52,7 @@ public Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) @Override public TracingContextPropagator getContextPropagator() { - return new OTelTracingContextPropagator(openTelemetry); + return new OTelTracingContextPropagator(refCountedOpenTelemetry.get()); } private Span createOtelSpan(SpanCreationContext spanCreationContext, Span parentSpan) { diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java index 233c93e6b9a36..9de575b69774a 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java @@ -8,11 +8,13 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.test.OpenSearchTestCase; +import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.metrics.DoubleCounter; import io.opentelemetry.api.metrics.DoubleCounterBuilder; import io.opentelemetry.api.metrics.DoubleUpDownCounter; @@ -34,12 +36,16 @@ public void testCounter() { String description = "test"; String unit = "1"; Meter mockMeter = mock(Meter.class); + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); DoubleCounterBuilder mockOTelDoubleCounterBuilder = mock(DoubleCounterBuilder.class); MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -59,6 +65,7 @@ public void testCounterNegativeValue() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); @@ -66,7 +73,10 @@ public void testCounterNegativeValue() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -83,6 +93,7 @@ public void testUpDownCounter() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleUpDownCounter mockOTelUpDownDoubleCounter = mock(DoubleUpDownCounter.class); LongUpDownCounterBuilder mockOTelLongUpDownCounterBuilder = mock(LongUpDownCounterBuilder.class); @@ -90,7 +101,10 @@ public void testUpDownCounter() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.upDownCounterBuilder(counterName)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setDescription(description)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongUpDownCounterBuilder); diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java index 1a508ed252493..1f0c2f674e655 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.tracing.attributes.Attributes; import org.opensearch.test.OpenSearchTestCase; @@ -37,7 +38,10 @@ public void testCreateSpanWithoutParent() { when(mockSpanBuilder.startSpan()).thenReturn(mock(io.opentelemetry.api.trace.Span.class)); when(mockSpanBuilder.setSpanKind(any(io.opentelemetry.api.trace.SpanKind.class))).thenReturn(mockSpanBuilder); Attributes attributes = Attributes.create().addAttribute("name", "value"); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), null); verify(mockSpanBuilder, never()).setParent(any()); verify(mockSpanBuilder).setAllAttributes(createAttribute(attributes)); @@ -59,7 +63,10 @@ public void testCreateSpanWithParent() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create().addAttribute("name", 1l); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), parentSpan); @@ -85,7 +92,10 @@ public void testCreateSpanWithParentWithMultipleAttributes() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create() .addAttribute("key1", 1l) .addAttribute("key2", 2.0) @@ -125,7 +135,10 @@ public void testGetContextPropagator() { TracerProvider mockTracerProvider = mock(TracerProvider.class); when(mockTracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockTracer); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); assertTrue(tracingTelemetry.getContextPropagator() instanceof OTelTracingContextPropagator); } diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 08158d05e676b..dff1b0ca8b1a1 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -696,6 +696,12 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_SETTING ), List.of(FeatureFlags.TELEMETRY), - List.of(TelemetrySettings.TRACER_ENABLED_SETTING, TelemetrySettings.TRACER_SAMPLER_PROBABILITY) + List.of( + TelemetrySettings.TRACER_ENABLED_SETTING, + TelemetrySettings.TRACER_SAMPLER_PROBABILITY, + TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING, + TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING, + TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING + ) ); } diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index ea31eadf24d44..92703a8c8c5b8 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -599,10 +599,23 @@ protected Node( MetricsRegistryFactory metricsRegistryFactory; if (FeatureFlags.isEnabled(TELEMETRY)) { final TelemetrySettings telemetrySettings = new TelemetrySettings(settings, clusterService.getClusterSettings()); - List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); - TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); - tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); - metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + if (telemetrySettings.isTracingFeatureEnabled() || telemetrySettings.isMetricsFeatureEnabled()) { + List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); + TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); + if (telemetrySettings.isTracingFeatureEnabled()) { + tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); + } else { + tracerFactory = new NoopTracerFactory(); + } + if (telemetrySettings.isMetricsFeatureEnabled()) { + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + } else { + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } + } else { + tracerFactory = new NoopTracerFactory(); + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } } else { tracerFactory = new NoopTracerFactory(); metricsRegistryFactory = new NoopMetricsRegistryFactory(); diff --git a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java index edb20cfa9dfc5..24dcab98c8870 100644 --- a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java +++ b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java @@ -28,6 +28,20 @@ public class TelemetrySettings { Setting.Property.Dynamic ); + public static final Setting TRACER_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.tracer.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + + public static final Setting METRICS_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.metrics.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + /** * Probability of sampler */ @@ -53,9 +67,14 @@ public class TelemetrySettings { private volatile boolean tracingEnabled; private volatile double samplingProbability; + private final boolean tracingFeatureEnabled; + private final boolean metricsFeatureEnabled; + public TelemetrySettings(Settings settings, ClusterSettings clusterSettings) { this.tracingEnabled = TRACER_ENABLED_SETTING.get(settings); this.samplingProbability = TRACER_SAMPLER_PROBABILITY.get(settings); + this.tracingFeatureEnabled = TRACER_FEATURE_ENABLED_SETTING.get(settings); + this.metricsFeatureEnabled = METRICS_FEATURE_ENABLED_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(TRACER_ENABLED_SETTING, this::setTracingEnabled); clusterSettings.addSettingsUpdateConsumer(TRACER_SAMPLER_PROBABILITY, this::setSamplingProbability); @@ -83,4 +102,12 @@ public void setSamplingProbability(double samplingProbability) { public double getSamplingProbability() { return samplingProbability; } + + public boolean isTracingFeatureEnabled() { + return tracingFeatureEnabled; + } + + public boolean isMetricsFeatureEnabled() { + return metricsFeatureEnabled; + } } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java index b2308402379ac..631fb8242d78e 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java @@ -59,6 +59,11 @@ public SpanScope withSpanInScope(Span span) { return getDelegateTracer().withSpanInScope(span); } + @Override + public boolean isRecording() { + return getDelegateTracer().isRecording(); + } + @Override public void close() throws IOException { defaultTracer.close(); diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java index 0a9757310fe8b..e0fb690bd29be 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.channels; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.http.HttpChannel; import org.opensearch.http.HttpResponse; @@ -50,7 +49,7 @@ private TraceableHttpChannel(HttpChannel delegate, Span span, Tracer tracer) { * @return http channel */ public static HttpChannel create(HttpChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableHttpChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java index d256c9d4d0e53..32769dd1d848d 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.common.io.stream.BytesStreamOutput; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.RestChannel; @@ -53,7 +52,7 @@ private TraceableRestChannel(RestChannel delegate, Span span, Tracer tracer) { * @return rest channel */ public static RestChannel create(RestChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableRestChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java index bd60c35c3baac..45268b4807cd9 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.Version; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -53,7 +52,7 @@ public TraceableTcpTransportChannel(TcpTransportChannel delegate, Span span, Tra * @return transport channel */ public static TransportChannel create(TcpTransportChannel delegate, final Span span, final Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { delegate.getChannel().addCloseListener(new ActionListener() { @Override public void onResponse(Void unused) { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java index 538bf82a1dbec..eb9d53d2df51b 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.handler; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -55,7 +54,7 @@ public static TransportResponseHandler create( Span span, Tracer tracer ) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableTransportResponseHandler(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java index 3e201641a529b..0cb4ce71d05f8 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.listener; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.telemetry.tracing.Span; import org.opensearch.telemetry.tracing.SpanScope; @@ -47,7 +46,7 @@ private TraceableActionListener(ActionListener delegate, Span span, Tr * @return action listener */ public static ActionListener create(ActionListener delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableActionListener(delegate, span, tracer); } else { return delegate; diff --git a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java index 5d5ea62dd161e..80942123fd4fd 100644 --- a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java @@ -63,6 +63,18 @@ public void testGetMetricsWithAvailableMetricsTelemetry() { } + public void testNullMetricsTelemetry() { + Settings settings = Settings.builder().put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getMetricsTelemetry()).thenReturn(null); + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, Optional.of(mockTelemetry)); + + MetricsRegistry metricsRegistry = metricsRegistryFactory.getMetricsRegistry(); + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java index b27f888eaf502..3a388be22445e 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java @@ -83,6 +83,18 @@ public void testGetTracerWithAvailableTracingTelemetryReturnsWrappedTracer() { } + public void testNullTracer() { + Settings settings = Settings.builder().put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getTracingTelemetry()).thenReturn(null); + tracerFactory = new TracerFactory(telemetrySettings, Optional.of(mockTelemetry), new ThreadContext(Settings.EMPTY)); + + Tracer tracer = tracerFactory.getTracer(); + assertTrue(tracer instanceof NoopTracer); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java index 43e0cb8e44439..8606104d26103 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class WrappedTracerTests extends OpenSearchTestCase { @@ -38,6 +39,7 @@ public void testStartSpanWithTracingDisabledInvokesNoopTracer() throws Exception SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof NoopTracer); + assertFalse(wrappedTracer.isRecording()); verify(mockDefaultTracer, never()).startSpan(SpanCreationContext.internal().name("foo")); } } @@ -46,12 +48,13 @@ public void testStartSpanWithTracingEnabledInvokesDefaultTracer() throws Excepti Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true).build(); TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); - + when(mockDefaultTracer.isRecording()).thenReturn(true); try (WrappedTracer wrappedTracer = new WrappedTracer(telemetrySettings, mockDefaultTracer)) { SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof DefaultTracer); + assertTrue(wrappedTracer.isRecording()); verify(mockDefaultTracer).startSpan(eq(spanCreationContext)); } } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index 3310c76bdaa93..eeaead5465291 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -1972,6 +1972,7 @@ protected Settings nodeSettings(int nodeOrdinal) { } // Enable tracer only when Telemetry Setting is enabled if (featureFlagSettings().getAsBoolean(FeatureFlags.TELEMETRY_SETTING.getKey(), false)) { + builder.put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true); builder.put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true); } if (FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING.get(featureFlagSettings)) { diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java index a623c3bd84117..13b824e3cd070 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java @@ -252,6 +252,7 @@ private Node newNode() { .putList(DISCOVERY_SEED_HOSTS_SETTING.getKey()) // empty list disables a port scan for other nodes .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) .put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true) + .put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true) .put(nodeSettings()) // allow test cases to provide their own settings or override these .put(featureFlagSettings()); @@ -270,6 +271,7 @@ private Node newNode() { plugins.add(MockHttpTransport.TestPlugin.class); } plugins.add(MockScriptService.TestPlugin.class); + plugins.add(MockTelemetryPlugin.class); Node node = new MockNode(settingsBuilder.build(), plugins, forbidPrivateIndexSettings()); try { diff --git a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java index 95321a7009be9..dda413ce2818e 100644 --- a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java +++ b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java @@ -12,6 +12,7 @@ import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.noop.NoopCounter; import org.opensearch.telemetry.tracing.TracingTelemetry; import org.opensearch.test.telemetry.tracing.MockTracingTelemetry; @@ -37,12 +38,12 @@ public MetricsTelemetry getMetricsTelemetry() { return new MetricsTelemetry() { @Override public Counter createCounter(String name, String description, String unit) { - return null; + return NoopCounter.INSTANCE; } @Override public Counter createUpDownCounter(String name, String description, String unit) { - return null; + return NoopCounter.INSTANCE; } @Override From f00103c4c5037cdc2a018a7525b23bd70b1332dc Mon Sep 17 00:00:00 2001 From: Bharathwaj G Date: Wed, 18 Oct 2023 20:17:39 +0530 Subject: [PATCH 2/2] [Backport 2.x] Enhancing FS stats to include read / write time, io time and queue size (#10696) * Enhancing FS stats to include read/write time, queue size and IO time (#10541) * Enhancing FS stats to include read / write time, io time and queue size Signed-off-by: Bharathwaj G * 2.x specific changes Signed-off-by: Bharathwaj G --------- Signed-off-by: Bharathwaj G --- CHANGELOG.md | 1 + .../org/opensearch/monitor/fs/FsInfo.java | 178 +++++++++++++++++- .../org/opensearch/monitor/fs/FsProbe.java | 29 +++ .../cluster/node/stats/NodeStatsTests.java | 12 ++ .../monitor/fs/DeviceStatsTests.java | 19 +- .../opensearch/monitor/fs/FsProbeTests.java | 51 +++++ 6 files changed, 285 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 208b611b49e37..739ff25b13e8b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- [Admission control] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10696](https://github.com/opensearch-project/OpenSearch/pull/10696)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java index cbde61925d834..73ae29920d2a0 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java @@ -237,6 +237,14 @@ public static class DeviceStats implements Writeable, ToXContentFragment { final long previousWritesCompleted; final long currentSectorsWritten; final long previousSectorsWritten; + final long currentReadTime; + final long previousReadTime; + final long currentWriteTime; + final long previousWriteTime; + final long currentQueueSize; + final long previousQueueSize; + final long currentIOTime; + final long previousIOTime; public DeviceStats( final int majorDeviceNumber, @@ -246,6 +254,10 @@ public DeviceStats( final long currentSectorsRead, final long currentWritesCompleted, final long currentSectorsWritten, + final long currentReadTime, + final long currentWriteTime, + final long currrentQueueSize, + final long currentIOTime, final DeviceStats previousDeviceStats ) { this( @@ -259,7 +271,15 @@ public DeviceStats( currentSectorsRead, previousDeviceStats != null ? previousDeviceStats.currentSectorsRead : -1, currentWritesCompleted, - previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1 + previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1, + currentReadTime, + previousDeviceStats != null ? previousDeviceStats.currentReadTime : -1, + currentWriteTime, + previousDeviceStats != null ? previousDeviceStats.currentWriteTime : -1, + currrentQueueSize, + previousDeviceStats != null ? previousDeviceStats.currentQueueSize : -1, + currentIOTime, + previousDeviceStats != null ? previousDeviceStats.currentIOTime : -1 ); } @@ -274,7 +294,15 @@ private DeviceStats( final long currentSectorsRead, final long previousSectorsRead, final long currentWritesCompleted, - final long previousWritesCompleted + final long previousWritesCompleted, + final long currentReadTime, + final long previousReadTime, + final long currentWriteTime, + final long previousWriteTime, + final long currentQueueSize, + final long previousQueueSize, + final long currentIOTime, + final long previousIOTime ) { this.majorDeviceNumber = majorDeviceNumber; this.minorDeviceNumber = minorDeviceNumber; @@ -287,6 +315,14 @@ private DeviceStats( this.previousSectorsRead = previousSectorsRead; this.currentSectorsWritten = currentSectorsWritten; this.previousSectorsWritten = previousSectorsWritten; + this.currentReadTime = currentReadTime; + this.previousReadTime = previousReadTime; + this.currentWriteTime = currentWriteTime; + this.previousWriteTime = previousWriteTime; + this.currentQueueSize = currentQueueSize; + this.previousQueueSize = previousQueueSize; + this.currentIOTime = currentIOTime; + this.previousIOTime = previousIOTime; } public DeviceStats(StreamInput in) throws IOException { @@ -301,6 +337,25 @@ public DeviceStats(StreamInput in) throws IOException { previousSectorsRead = in.readLong(); currentSectorsWritten = in.readLong(); previousSectorsWritten = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + currentReadTime = in.readLong(); + previousReadTime = in.readLong(); + currentWriteTime = in.readLong(); + previousWriteTime = in.readLong(); + currentQueueSize = in.readLong(); + previousQueueSize = in.readLong(); + currentIOTime = in.readLong(); + previousIOTime = in.readLong(); + } else { + currentReadTime = 0; + previousReadTime = 0; + currentWriteTime = 0; + previousWriteTime = 0; + currentQueueSize = 0; + previousQueueSize = 0; + currentIOTime = 0; + previousIOTime = 0; + } } @Override @@ -316,6 +371,16 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(previousSectorsRead); out.writeLong(currentSectorsWritten); out.writeLong(previousSectorsWritten); + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeLong(currentReadTime); + out.writeLong(previousReadTime); + out.writeLong(currentWriteTime); + out.writeLong(previousWriteTime); + out.writeLong(currentQueueSize); + out.writeLong(previousQueueSize); + out.writeLong(currentIOTime); + out.writeLong(previousIOTime); + } } public long operations() { @@ -348,6 +413,39 @@ public long writeKilobytes() { return (currentSectorsWritten - previousSectorsWritten) / 2; } + /** + * Total time taken for all read operations + */ + public long readTime() { + if (previousReadTime == -1) return -1; + return currentReadTime - previousReadTime; + } + + /** + * Total time taken for all write operations + */ + public long writeTime() { + if (previousWriteTime == -1) return -1; + return currentWriteTime - previousWriteTime; + } + + /** + * Queue size based on weighted time spent doing I/Os + */ + public long queueSize() { + if (previousQueueSize == -1) return -1; + return currentQueueSize - previousQueueSize; + } + + /** + * Total time spent doing I/Os + */ + public long ioTimeInMillis() { + if (previousIOTime == -1) return -1; + + return (currentIOTime - previousIOTime); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field("device_name", deviceName); @@ -356,9 +454,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(IoStats.WRITE_OPERATIONS, writeOperations()); builder.field(IoStats.READ_KILOBYTES, readKilobytes()); builder.field(IoStats.WRITE_KILOBYTES, writeKilobytes()); + builder.field(IoStats.READ_TIME, readTime()); + builder.field(IoStats.WRITE_TIME, writeTime()); + builder.field(IoStats.QUEUE_SIZE, queueSize()); + builder.field(IoStats.IO_TIME_MS, ioTimeInMillis()); return builder; } - } /** @@ -373,6 +474,10 @@ public static class IoStats implements Writeable, ToXContentFragment { private static final String WRITE_OPERATIONS = "write_operations"; private static final String READ_KILOBYTES = "read_kilobytes"; private static final String WRITE_KILOBYTES = "write_kilobytes"; + private static final String READ_TIME = "read_time"; + private static final String WRITE_TIME = "write_time"; + private static final String QUEUE_SIZE = "queue_size"; + private static final String IO_TIME_MS = "io_time_in_millis"; final DeviceStats[] devicesStats; final long totalOperations; @@ -380,6 +485,10 @@ public static class IoStats implements Writeable, ToXContentFragment { final long totalWriteOperations; final long totalReadKilobytes; final long totalWriteKilobytes; + final long totalReadTime; + final long totalWriteTime; + final long totalQueueSize; + final long totalIOTimeInMillis; public IoStats(final DeviceStats[] devicesStats) { this.devicesStats = devicesStats; @@ -388,18 +497,30 @@ public IoStats(final DeviceStats[] devicesStats) { long totalWriteOperations = 0; long totalReadKilobytes = 0; long totalWriteKilobytes = 0; + long totalReadTime = 0; + long totalWriteTime = 0; + long totalQueueSize = 0; + long totalIOTimeInMillis = 0; for (DeviceStats deviceStats : devicesStats) { totalOperations += deviceStats.operations() != -1 ? deviceStats.operations() : 0; totalReadOperations += deviceStats.readOperations() != -1 ? deviceStats.readOperations() : 0; totalWriteOperations += deviceStats.writeOperations() != -1 ? deviceStats.writeOperations() : 0; totalReadKilobytes += deviceStats.readKilobytes() != -1 ? deviceStats.readKilobytes() : 0; totalWriteKilobytes += deviceStats.writeKilobytes() != -1 ? deviceStats.writeKilobytes() : 0; + totalReadTime += deviceStats.readTime() != -1 ? deviceStats.readTime() : 0; + totalWriteTime += deviceStats.writeTime() != -1 ? deviceStats.writeTime() : 0; + totalQueueSize += deviceStats.queueSize() != -1 ? deviceStats.queueSize() : 0; + totalIOTimeInMillis += deviceStats.ioTimeInMillis() != -1 ? deviceStats.ioTimeInMillis() : 0; } this.totalOperations = totalOperations; this.totalReadOperations = totalReadOperations; this.totalWriteOperations = totalWriteOperations; this.totalReadKilobytes = totalReadKilobytes; this.totalWriteKilobytes = totalWriteKilobytes; + this.totalReadTime = totalReadTime; + this.totalWriteTime = totalWriteTime; + this.totalQueueSize = totalQueueSize; + this.totalIOTimeInMillis = totalIOTimeInMillis; } public IoStats(StreamInput in) throws IOException { @@ -414,6 +535,17 @@ public IoStats(StreamInput in) throws IOException { this.totalWriteOperations = in.readLong(); this.totalReadKilobytes = in.readLong(); this.totalWriteKilobytes = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + this.totalReadTime = in.readLong(); + this.totalWriteTime = in.readLong(); + this.totalQueueSize = in.readLong(); + this.totalIOTimeInMillis = in.readLong(); + } else { + this.totalReadTime = 0; + this.totalWriteTime = 0; + this.totalQueueSize = 0; + this.totalIOTimeInMillis = 0; + } } @Override @@ -427,6 +559,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalWriteOperations); out.writeLong(totalReadKilobytes); out.writeLong(totalWriteKilobytes); + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeLong(totalReadTime); + out.writeLong(totalWriteTime); + out.writeLong(totalQueueSize); + out.writeLong(totalIOTimeInMillis); + } } public DeviceStats[] getDevicesStats() { @@ -453,6 +591,34 @@ public long getTotalWriteKilobytes() { return totalWriteKilobytes; } + /** + * Sum of read time across all devices + */ + public long getTotalReadTime() { + return totalReadTime; + } + + /** + * Sum of write time across all devices + */ + public long getTotalWriteTime() { + return totalWriteTime; + } + + /** + * Sum of queue size across all devices + */ + public long getTotalQueueSize() { + return totalQueueSize; + } + + /** + * Sum of IO time across all devices + */ + public long getTotalIOTimeMillis() { + return totalIOTimeInMillis; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { if (devicesStats.length > 0) { @@ -470,11 +636,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(WRITE_OPERATIONS, totalWriteOperations); builder.field(READ_KILOBYTES, totalReadKilobytes); builder.field(WRITE_KILOBYTES, totalWriteKilobytes); + + builder.field(READ_TIME, totalReadTime); + builder.field(WRITE_TIME, totalWriteTime); + builder.field(QUEUE_SIZE, totalQueueSize); + builder.field(IO_TIME_MS, totalIOTimeInMillis); builder.endObject(); } return builder; } - } private final long timestamp; diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java index f39de883536be..0c3d412ec4218 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java @@ -109,6 +109,25 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, List devicesStats = new ArrayList<>(); + /** + * The /proc/diskstats file displays the I/O statistics of block devices. + * Each line contains the following 14 fields: ( + additional fields ) + * + * 1 major number + * 2 minor number + * 3 device name + * 4 reads completed successfully + * 5 reads merged + * 6 sectors read + * 7 time spent reading (ms) + * 8 writes completed + * 9 writes merged + * 10 sectors written + * 11 time spent writing (ms) + * 12 I/Os currently in progress + * 13 time spent doing I/Os (ms) ---- IO use percent + * 14 weighted time spent doing I/Os (ms) ---- Queue size + */ List lines = readProcDiskStats(); if (!lines.isEmpty()) { for (String line : lines) { @@ -123,6 +142,12 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, final long sectorsRead = Long.parseLong(fields[5]); final long writesCompleted = Long.parseLong(fields[7]); final long sectorsWritten = Long.parseLong(fields[9]); + // readTime and writeTime calculates the total read/write time taken for each request to complete + // ioTime calculates actual time queue and disks are busy + final long readTime = Long.parseLong(fields[6]); + final long writeTime = Long.parseLong(fields[10]); + final long ioTime = fields.length > 12 ? Long.parseLong(fields[12]) : 0; + final long queueSize = fields.length > 13 ? Long.parseLong(fields[13]) : 0; final FsInfo.DeviceStats deviceStats = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -131,6 +156,10 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, deviceMap.get(Tuple.tuple(majorDeviceNumber, minorDeviceNumber)) ); devicesStats.add(deviceStats); diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index e0b35c69cc3c0..ac0ca205e754e 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -282,6 +282,10 @@ public void testSerialization() throws IOException { assertEquals(ioStats.getTotalReadOperations(), deserializedIoStats.getTotalReadOperations()); assertEquals(ioStats.getTotalWriteKilobytes(), deserializedIoStats.getTotalWriteKilobytes()); assertEquals(ioStats.getTotalWriteOperations(), deserializedIoStats.getTotalWriteOperations()); + assertEquals(ioStats.getTotalReadTime(), deserializedIoStats.getTotalReadTime()); + assertEquals(ioStats.getTotalWriteTime(), deserializedIoStats.getTotalWriteTime()); + assertEquals(ioStats.getTotalQueueSize(), deserializedIoStats.getTotalQueueSize()); + assertEquals(ioStats.getTotalIOTimeMillis(), deserializedIoStats.getTotalIOTimeMillis()); assertEquals(ioStats.getDevicesStats().length, deserializedIoStats.getDevicesStats().length); for (int i = 0; i < ioStats.getDevicesStats().length; i++) { FsInfo.DeviceStats deviceStats = ioStats.getDevicesStats()[i]; @@ -625,6 +629,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), null ); deviceStatsArray[i] = new FsInfo.DeviceStats( @@ -635,6 +643,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), previousDeviceStats ); } diff --git a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java index 0fd039b84e887..0059f8e215f2e 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java @@ -46,7 +46,12 @@ public void testDeviceStats() { final int sectorsRead = randomIntBetween(8 * readsCompleted, 16 * readsCompleted); final int writesCompleted = randomIntBetween(1, 1 << 16); final int sectorsWritten = randomIntBetween(8 * writesCompleted, 16 * writesCompleted); - + final int readTime = randomIntBetween(1, 1 << 16); + ; + final int writeTime = randomIntBetween(1, 1 << 16); + ; + final int queueSize = randomIntBetween(1, 1 << 16); + final int ioTime = randomIntBetween(1, 1 << 16); FsInfo.DeviceStats previous = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -55,6 +60,10 @@ public void testDeviceStats() { sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, null ); FsInfo.DeviceStats current = new FsInfo.DeviceStats( @@ -65,6 +74,10 @@ public void testDeviceStats() { sectorsRead + 16384, writesCompleted + 2048, sectorsWritten + 32768, + readTime + 500, + writeTime + 100, + queueSize + 20, + ioTime + 8192, previous ); assertThat(current.operations(), equalTo(1024L + 2048L)); @@ -72,6 +85,10 @@ public void testDeviceStats() { assertThat(current.writeOperations(), equalTo(2048L)); assertThat(current.readKilobytes(), equalTo(16384L / 2)); assertThat(current.writeKilobytes(), equalTo(32768L / 2)); + assertEquals(500, current.readTime()); + assertEquals(100, current.writeTime()); + assertEquals(20, current.queueSize()); + assertEquals(8192, current.ioTimeInMillis()); } } diff --git a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java index 686a624d988d7..59a888c665be7 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java @@ -91,6 +91,14 @@ public void testFsInfo() throws IOException { assertThat(deviceStats.previousWritesCompleted, equalTo(-1L)); assertThat(deviceStats.currentSectorsWritten, greaterThanOrEqualTo(0L)); assertThat(deviceStats.previousSectorsWritten, equalTo(-1L)); + assertThat(deviceStats.currentReadTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousReadTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentWriteTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousWriteTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentQueueSize, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousQueueSize, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentIOTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousIOTime, greaterThanOrEqualTo(-1L)); } } else { assertNull(stats.getIoStats()); @@ -243,6 +251,16 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[0].previousWritesCompleted, equalTo(-1L)); assertThat(first.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(first.devicesStats[0].previousSectorsWritten, equalTo(-1L)); + + assertEquals(33457, first.devicesStats[0].currentReadTime); + assertEquals(-1, first.devicesStats[0].previousReadTime); + assertEquals(18730966, first.devicesStats[0].currentWriteTime); + assertEquals(-1, first.devicesStats[0].previousWriteTime); + assertEquals(18767169, first.devicesStats[0].currentQueueSize); + assertEquals(-1, first.devicesStats[0].previousQueueSize); + assertEquals(1918440, first.devicesStats[0].currentIOTime); + assertEquals(-1, first.devicesStats[0].previousIOTime); + assertThat(first.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(first.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(first.devicesStats[1].deviceName, equalTo("dm-2")); @@ -255,6 +273,15 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[1].currentSectorsWritten, equalTo(64126096L)); assertThat(first.devicesStats[1].previousSectorsWritten, equalTo(-1L)); + assertEquals(49312, first.devicesStats[1].currentReadTime); + assertEquals(-1, first.devicesStats[1].previousReadTime); + assertEquals(33730596, first.devicesStats[1].currentWriteTime); + assertEquals(-1, first.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1, first.devicesStats[1].previousIOTime); + diskStats.set( Arrays.asList( " 259 0 nvme0n1 336870 0 7928397 82876 10264393 0 182986405 52451610 0 2971042 52536492", @@ -281,6 +308,16 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[0].previousWritesCompleted, equalTo(8398869L)); assertThat(second.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(second.devicesStats[0].previousSectorsWritten, equalTo(118857776L)); + + assertEquals(33464, second.devicesStats[0].currentReadTime); + assertEquals(33457, second.devicesStats[0].previousReadTime); + assertEquals(18730966, second.devicesStats[0].currentWriteTime); + assertEquals(18730966, second.devicesStats[0].previousWriteTime); + assertEquals(18767176, second.devicesStats[0].currentQueueSize); + assertEquals(18767169, second.devicesStats[0].previousQueueSize); + assertEquals(1918444, second.devicesStats[0].currentIOTime); + assertEquals(1918440, second.devicesStats[0].previousIOTime); + assertThat(second.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(second.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(second.devicesStats[1].deviceName, equalTo("dm-2")); @@ -293,11 +330,25 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[1].currentSectorsWritten, equalTo(64128568L)); assertThat(second.devicesStats[1].previousSectorsWritten, equalTo(64126096L)); + assertEquals(49369, second.devicesStats[1].currentReadTime); + assertEquals(49312, second.devicesStats[1].previousReadTime); + assertEquals(33730766, second.devicesStats[1].currentWriteTime); + assertEquals(33730596, second.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1L, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1L, first.devicesStats[1].previousIOTime); + assertThat(second.totalOperations, equalTo(575L)); assertThat(second.totalReadOperations, equalTo(261L)); assertThat(second.totalWriteOperations, equalTo(314L)); assertThat(second.totalReadKilobytes, equalTo(2392L)); assertThat(second.totalWriteKilobytes, equalTo(1236L)); + + assertEquals(64, second.totalReadTime); + assertEquals(170, second.totalWriteTime); + assertEquals(236, second.totalQueueSize); + assertEquals(158, second.totalIOTimeInMillis); } public void testAdjustForHugeFilesystems() throws Exception {