From 2a8ffbaa766429b7effb3078f2c223c3bfb19871 Mon Sep 17 00:00:00 2001 From: gargharsh3134 <51459091+gargharsh3134@users.noreply.github.com> Date: Mon, 10 Jun 2024 22:46:35 +0530 Subject: [PATCH] [Backport] ClusterManager metrics instrumentation changes (#14118) --------- Signed-off-by: Harsh Garg Co-authored-by: Harsh Garg --- CHANGELOG.md | 2 + .../insights/QueryInsightsPluginTests.java | 4 +- .../listener/QueryInsightsListenerTests.java | 3 +- .../TransportTopQueriesActionTests.java | 3 +- .../cluster/ClusterManagerMetrics.java | 99 +++++++++++++++++++ .../org/opensearch/cluster/ClusterModule.java | 6 +- .../cluster/coordination/Coordinator.java | 16 ++- .../coordination/FollowersChecker.java | 7 +- .../cluster/coordination/LeaderChecker.java | 11 ++- .../routing/allocation/AllocationService.java | 28 ++++-- .../service/ClusterApplierService.java | 30 +++++- .../service/ClusterManagerService.java | 11 +++ .../cluster/service/ClusterService.java | 15 ++- .../cluster/service/MasterService.java | 22 +++++ .../opensearch/discovery/DiscoveryModule.java | 7 +- .../main/java/org/opensearch/node/Node.java | 38 ++++--- ...ActionIndicesThatCannotBeCreatedTests.java | 7 +- .../bulk/TransportBulkActionIngestTests.java | 7 +- .../search/SearchRequestSlowLogTests.java | 23 ++--- .../cluster/ClusterModuleTests.java | 26 +++-- .../coordination/FollowersCheckerTests.java | 65 +++++++++--- .../coordination/LeaderCheckerTests.java | 25 ++++- .../cluster/coordination/NodeJoinTests.java | 8 +- .../allocation/AllocationServiceTests.java | 33 ++++++- .../service/ClusterApplierServiceTests.java | 95 ++++++++++++++++-- .../cluster/service/MasterServiceTests.java | 85 +++++++++++++--- .../discovery/DiscoveryModuleTests.java | 5 +- .../gateway/ClusterStateUpdatersTests.java | 3 +- .../gateway/GatewayServiceTests.java | 3 +- .../index/IndexingPressureServiceTests.java | 3 +- ...exingPressureConcurrentExecutionTests.java | 3 +- ...ardIndexingPressureMemoryManagerTests.java | 4 +- .../ShardIndexingPressureSettingsTests.java | 3 +- .../ShardIndexingPressureStoreTests.java | 4 +- .../index/ShardIndexingPressureTests.java | 3 +- .../RemoteSegmentTransferTrackerTests.java | 3 +- .../RemoteStorePressureServiceTests.java | 3 +- .../RemoteStorePressureSettingsTests.java | 3 +- .../RemoteStoreStatsTrackerFactoryTests.java | 19 +++- .../RemoteStoreRefreshListenerTests.java | 5 +- .../node/ResponseCollectorServiceTests.java | 3 +- .../AdmissionControlServiceTests.java | 3 +- .../AdmissionControlSettingsTests.java | 3 +- .../CpuBasedAdmissionControllerTests.java | 3 +- .../IoBasedAdmissionControllerTests.java | 3 +- ...BasedAdmissionControllerSettingsTests.java | 3 +- ...BasedAdmissionControllerSettingsTests.java | 3 +- .../stats/AdmissionControlStatsTests.java | 3 +- .../stats/AdmissionControllerStatsTests.java | 3 +- .../snapshots/SnapshotResiliencyTests.java | 4 +- .../telemetry/TestInMemoryCounter.java | 52 ++++++++++ .../telemetry/TestInMemoryHistogram.java | 47 +++++++++ .../TestInMemoryMetricsRegistry.java | 71 +++++++++++++ .../AbstractCoordinatorTestCase.java | 13 ++- .../FakeThreadPoolClusterManagerService.java | 5 +- .../opensearch/test/ClusterServiceUtils.java | 26 ++++- 56 files changed, 848 insertions(+), 137 deletions(-) create mode 100644 server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java create mode 100644 server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java create mode 100644 server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java create mode 100644 server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 0eb375b14b6cc..2fd8af9ed1261 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- Add leader and follower check failure counter metrics ([#12439](https://github.com/opensearch-project/OpenSearch/pull/12439)) +- Add latency metrics for instrumenting critical clusterManager code paths ([#12333](https://github.com/opensearch-project/OpenSearch/pull/12333)) - Add support for Azure Managed Identity in repository-azure ([#12423](https://github.com/opensearch-project/OpenSearch/issues/12423)) - Add useCompoundFile index setting ([#13478](https://github.com/opensearch-project/OpenSearch/pull/13478)) - Make outbound side of transport protocol dependent ([#13293](https://github.com/opensearch-project/OpenSearch/pull/13293)) diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java index 15bf284652e13..8b8856e3e305c 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java @@ -21,6 +21,7 @@ import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.plugins.ActionPlugin; import org.opensearch.rest.RestHandler; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.ScalingExecutorBuilder; @@ -51,8 +52,7 @@ public void setup() { clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE); clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_EXPORTER_SETTINGS); - clusterService = new ClusterService(settings, clusterSettings, threadPool); - + clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, threadPool); } public void testGetSettings() { diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java index 455bb8b46bf4c..b794a2e4b8608 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java @@ -28,6 +28,7 @@ import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.tasks.Task; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.ThreadPool; import org.junit.Before; @@ -69,7 +70,7 @@ public void setup() { clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_ENABLED); clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); clusterSettings.registerSetting(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_WINDOW_SIZE); - clusterService = new ClusterService(settings, clusterSettings, null); + clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); when(queryInsightsService.isCollectionEnabled(MetricType.LATENCY)).thenReturn(true); when(queryInsightsService.getTopQueriesService(MetricType.LATENCY)).thenReturn(topQueriesService); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesActionTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesActionTests.java index a5f36b6e8cce0..d05cf7b6a636f 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesActionTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesActionTests.java @@ -17,6 +17,7 @@ import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesResponse; import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.settings.QueryInsightsSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportService; @@ -33,7 +34,7 @@ public class TransportTopQueriesActionTests extends OpenSearchTestCase { private final Settings.Builder settingsBuilder = Settings.builder(); private final Settings settings = settingsBuilder.build(); private final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - private final ClusterService clusterService = new ClusterService(settings, clusterSettings, threadPool); + private final ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, threadPool); private final TransportService transportService = mock(TransportService.class); private final QueryInsightsService topQueriesByLatencyService = mock(QueryInsightsService.class); private final ActionFilters actionFilters = mock(ActionFilters.class); diff --git a/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java new file mode 100644 index 0000000000000..a98349a4af5cd --- /dev/null +++ b/server/src/main/java/org/opensearch/cluster/ClusterManagerMetrics.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.Objects; +import java.util.Optional; + +/** + * Class containing metrics (counters/latency) specific to ClusterManager. + * + * @opensearch.internal + */ +public final class ClusterManagerMetrics { + + private static final String LATENCY_METRIC_UNIT_MS = "ms"; + private static final String COUNTER_METRICS_UNIT = "1"; + + public final Histogram clusterStateAppliersHistogram; + public final Histogram clusterStateListenersHistogram; + public final Histogram rerouteHistogram; + public final Histogram clusterStateComputeHistogram; + public final Histogram clusterStatePublishHistogram; + + public final Counter leaderCheckFailureCounter; + public final Counter followerChecksFailureCounter; + + public ClusterManagerMetrics(MetricsRegistry metricsRegistry) { + clusterStateAppliersHistogram = metricsRegistry.createHistogram( + "cluster.state.appliers.latency", + "Histogram for tracking the latency of cluster state appliers", + LATENCY_METRIC_UNIT_MS + ); + clusterStateListenersHistogram = metricsRegistry.createHistogram( + "cluster.state.listeners.latency", + "Histogram for tracking the latency of cluster state listeners", + LATENCY_METRIC_UNIT_MS + ); + rerouteHistogram = metricsRegistry.createHistogram( + "allocation.reroute.latency", + "Histogram for recording latency of shard re-routing", + LATENCY_METRIC_UNIT_MS + ); + clusterStateComputeHistogram = metricsRegistry.createHistogram( + "cluster.state.new.compute.latency", + "Histogram for recording time taken to compute new cluster state", + LATENCY_METRIC_UNIT_MS + ); + clusterStatePublishHistogram = metricsRegistry.createHistogram( + "cluster.state.publish.success.latency", + "Histogram for recording time taken to publish a new cluster state", + LATENCY_METRIC_UNIT_MS + ); + followerChecksFailureCounter = metricsRegistry.createCounter( + "followers.checker.failure.count", + "Counter for number of failed follower checks", + COUNTER_METRICS_UNIT + ); + leaderCheckFailureCounter = metricsRegistry.createCounter( + "leader.checker.failure.count", + "Counter for number of failed leader checks", + COUNTER_METRICS_UNIT + ); + } + + public void recordLatency(Histogram histogram, Double value) { + histogram.record(value); + } + + public void recordLatency(Histogram histogram, Double value, Optional tags) { + if (Objects.isNull(tags) || tags.isEmpty()) { + histogram.record(value); + return; + } + histogram.record(value, tags.get()); + } + + public void incrementCounter(Counter counter, Double value) { + incrementCounter(counter, value, Optional.empty()); + } + + public void incrementCounter(Counter counter, Double value, Optional tags) { + if (Objects.isNull(tags) || tags.isEmpty()) { + counter.add(value); + return; + } + counter.add(value, tags.get()); + } +} diff --git a/server/src/main/java/org/opensearch/cluster/ClusterModule.java b/server/src/main/java/org/opensearch/cluster/ClusterModule.java index 93850cc046088..a8771500ff573 100644 --- a/server/src/main/java/org/opensearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/opensearch/cluster/ClusterModule.java @@ -144,7 +144,8 @@ public ClusterModule( List clusterPlugins, ClusterInfoService clusterInfoService, SnapshotsInfoService snapshotsInfoService, - ThreadContext threadContext + ThreadContext threadContext, + ClusterManagerMetrics clusterManagerMetrics ) { this.clusterPlugins = clusterPlugins; this.deciderList = createAllocationDeciders(settings, clusterService.getClusterSettings(), clusterPlugins); @@ -157,7 +158,8 @@ public ClusterModule( shardsAllocator, clusterInfoService, snapshotsInfoService, - settings + settings, + clusterManagerMetrics ); } diff --git a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java index 5b2b2b36ea79e..4d371cba45e03 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/Coordinator.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.LegacyESVersion; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskConfig; @@ -208,7 +209,8 @@ public Coordinator( ElectionStrategy electionStrategy, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; this.transportService = transportService; @@ -262,14 +264,22 @@ public Coordinator( this::handlePublishRequest, this::handleApplyCommit ); - this.leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, this::onLeaderFailure, nodeHealthService); + this.leaderChecker = new LeaderChecker( + settings, + clusterSettings, + transportService, + this::onLeaderFailure, + nodeHealthService, + clusterManagerMetrics + ); this.followersChecker = new FollowersChecker( settings, clusterSettings, transportService, this::onFollowerCheckRequest, this::removeNode, - nodeHealthService + nodeHealthService, + clusterManagerMetrics ); this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, logger); this.clusterApplier = clusterApplier; diff --git a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java index 70bb0515bb022..2ec0dabd91786 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/FollowersChecker.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; @@ -127,6 +128,7 @@ public class FollowersChecker { private final TransportService transportService; private final NodeHealthService nodeHealthService; private volatile FastResponseState fastResponseState; + private ClusterManagerMetrics clusterManagerMetrics; public FollowersChecker( Settings settings, @@ -134,7 +136,8 @@ public FollowersChecker( TransportService transportService, Consumer handleRequestAndUpdateState, BiConsumer onNodeFailure, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; this.transportService = transportService; @@ -161,6 +164,7 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti handleDisconnectedNode(node); } }); + this.clusterManagerMetrics = clusterManagerMetrics; } private void setFollowerCheckTimeout(TimeValue followerCheckTimeout) { @@ -413,6 +417,7 @@ public String executor() { } void failNode(String reason) { + clusterManagerMetrics.incrementCounter(clusterManagerMetrics.followerChecksFailureCounter, 1.0); transportService.getThreadPool().generic().execute(new Runnable() { @Override public void run() { diff --git a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java index 8d4373b865f62..4fd2c0eb13073 100644 --- a/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java +++ b/server/src/main/java/org/opensearch/cluster/coordination/LeaderChecker.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.cluster.node.DiscoveryNodes; import org.opensearch.common.Nullable; @@ -119,17 +120,17 @@ public class LeaderChecker { private final TransportService transportService; private final Consumer onLeaderFailure; private final NodeHealthService nodeHealthService; - private AtomicReference currentChecker = new AtomicReference<>(); - private volatile DiscoveryNodes discoveryNodes; + private final ClusterManagerMetrics clusterManagerMetrics; LeaderChecker( final Settings settings, final ClusterSettings clusterSettings, final TransportService transportService, final Consumer onLeaderFailure, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + final ClusterManagerMetrics clusterManagerMetrics ) { this.settings = settings; leaderCheckInterval = LEADER_CHECK_INTERVAL_SETTING.get(settings); @@ -138,6 +139,7 @@ public class LeaderChecker { this.transportService = transportService; this.onLeaderFailure = onLeaderFailure; this.nodeHealthService = nodeHealthService; + this.clusterManagerMetrics = clusterManagerMetrics; clusterSettings.addSettingsUpdateConsumer(LEADER_CHECK_TIMEOUT_SETTING, this::setLeaderCheckTimeout); transportService.registerRequestHandler( @@ -293,7 +295,6 @@ public void handleResponse(Empty response) { logger.debug("closed check scheduler received a response, doing nothing"); return; } - failureCountSinceLastSuccess.set(0); scheduleNextWakeUp(); // logs trace message indicating success } @@ -304,7 +305,6 @@ public void handleException(TransportException exp) { logger.debug("closed check scheduler received a response, doing nothing"); return; } - if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) { logger.debug(new ParameterizedMessage("leader [{}] disconnected during check", leader), exp); leaderFailed(new ConnectTransportException(leader, "disconnected during check", exp)); @@ -355,6 +355,7 @@ public String executor() { void leaderFailed(Exception e) { if (isClosed.compareAndSet(false, true)) { + clusterManagerMetrics.incrementCounter(clusterManagerMetrics.leaderCheckFailureCounter, 1.0); transportService.getThreadPool().generic().execute(new Runnable() { @Override public void run() { diff --git a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java index ae496e27ebf35..58b9753a11d73 100644 --- a/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java +++ b/server/src/main/java/org/opensearch/cluster/routing/allocation/AllocationService.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.Version; import org.opensearch.cluster.ClusterInfoService; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.RestoreInProgress; import org.opensearch.cluster.health.ClusterHealthStatus; @@ -56,10 +57,12 @@ import org.opensearch.cluster.routing.allocation.decider.AllocationDeciders; import org.opensearch.cluster.routing.allocation.decider.Decision; import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; import org.opensearch.gateway.GatewayAllocator; import org.opensearch.gateway.PriorityComparator; import org.opensearch.gateway.ShardsBatchGatewayAllocator; import org.opensearch.snapshots.SnapshotsInfoService; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import java.util.ArrayList; import java.util.Collections; @@ -96,6 +99,7 @@ public class AllocationService { private final ShardsAllocator shardsAllocator; private final ClusterInfoService clusterInfoService; private SnapshotsInfoService snapshotsInfoService; + private final ClusterManagerMetrics clusterManagerMetrics; // only for tests that use the GatewayAllocator as the unique ExistingShardsAllocator public AllocationService( @@ -105,7 +109,13 @@ public AllocationService( ClusterInfoService clusterInfoService, SnapshotsInfoService snapshotsInfoService ) { - this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService); + this( + allocationDeciders, + shardsAllocator, + clusterInfoService, + snapshotsInfoService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); setExistingShardsAllocators(Collections.singletonMap(GatewayAllocator.ALLOCATOR_NAME, gatewayAllocator)); } @@ -113,9 +123,10 @@ public AllocationService( AllocationDeciders allocationDeciders, ShardsAllocator shardsAllocator, ClusterInfoService clusterInfoService, - SnapshotsInfoService snapshotsInfoService + SnapshotsInfoService snapshotsInfoService, + ClusterManagerMetrics clusterManagerMetrics ) { - this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, Settings.EMPTY); + this(allocationDeciders, shardsAllocator, clusterInfoService, snapshotsInfoService, Settings.EMPTY, clusterManagerMetrics); } public AllocationService( @@ -123,14 +134,15 @@ public AllocationService( ShardsAllocator shardsAllocator, ClusterInfoService clusterInfoService, SnapshotsInfoService snapshotsInfoService, - Settings settings - + Settings settings, + ClusterManagerMetrics clusterManagerMetrics ) { this.allocationDeciders = allocationDeciders; this.shardsAllocator = shardsAllocator; this.clusterInfoService = clusterInfoService; this.snapshotsInfoService = snapshotsInfoService; this.settings = settings; + this.clusterManagerMetrics = clusterManagerMetrics; } /** @@ -550,11 +562,15 @@ private void reroute(RoutingAllocation allocation) { assert AutoExpandReplicas.getAutoExpandReplicaChanges(allocation.metadata(), allocation).isEmpty() : "auto-expand replicas out of sync with number of nodes in the cluster"; assert assertInitialized(); - + long rerouteStartTimeNS = System.nanoTime(); removeDelayMarkers(allocation); allocateExistingUnassignedShards(allocation); // try to allocate existing shard copies first shardsAllocator.allocate(allocation); + clusterManagerMetrics.recordLatency( + clusterManagerMetrics.rerouteHistogram, + (double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - rerouteStartTimeNS)) + ); assert RoutingNodes.assertShardStats(allocation.routingNodes()); } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java index a55721fb13cdc..6234427445754 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterApplierService.java @@ -36,6 +36,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; import org.opensearch.cluster.ClusterStateListener; @@ -61,6 +62,8 @@ import org.opensearch.common.util.concurrent.PrioritizedOpenSearchThreadPoolExecutor; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; @@ -68,6 +71,7 @@ import java.util.Collection; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CountDownLatch; @@ -120,8 +124,19 @@ public class ClusterApplierService extends AbstractLifecycleComponent implements private final String nodeName; private NodeConnectionsService nodeConnectionsService; + private final ClusterManagerMetrics clusterManagerMetrics; public ClusterApplierService(String nodeName, Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this(nodeName, settings, clusterSettings, threadPool, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); + } + + public ClusterApplierService( + String nodeName, + Settings settings, + ClusterSettings clusterSettings, + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics + ) { this.clusterSettings = clusterSettings; this.threadPool = threadPool; this.state = new AtomicReference<>(); @@ -132,6 +147,7 @@ public ClusterApplierService(String nodeName, Settings settings, ClusterSettings CLUSTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, this::setSlowTaskLoggingThreshold ); + this.clusterManagerMetrics = clusterManagerMetrics; } private void setSlowTaskLoggingThreshold(TimeValue slowTaskLoggingThreshold) { @@ -597,7 +613,7 @@ private void callClusterStateAppliers(ClusterChangedEvent clusterChangedEvent, S callClusterStateAppliers(clusterChangedEvent, stopWatch, lowPriorityStateAppliers); } - private static void callClusterStateAppliers( + private void callClusterStateAppliers( ClusterChangedEvent clusterChangedEvent, StopWatch stopWatch, Collection clusterStateAppliers @@ -605,7 +621,13 @@ private static void callClusterStateAppliers( for (ClusterStateApplier applier : clusterStateAppliers) { logger.trace("calling [{}] with change to version [{}]", applier, clusterChangedEvent.state().version()); try (TimingHandle ignored = stopWatch.timing("running applier [" + applier + "]")) { + long applierStartTimeNS = System.nanoTime(); applier.applyClusterState(clusterChangedEvent); + clusterManagerMetrics.recordLatency( + clusterManagerMetrics.clusterStateAppliersHistogram, + (double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - applierStartTimeNS)), + Optional.of(Tags.create().addTag("Operation", applier.getClass().getSimpleName())) + ); } } } @@ -624,7 +646,13 @@ private void callClusterStateListener( try { logger.trace("calling [{}] with change to version [{}]", listener, clusterChangedEvent.state().version()); try (TimingHandle ignored = stopWatch.timing("notifying listener [" + listener + "]")) { + long listenerStartTimeNS = System.nanoTime(); listener.clusterChanged(clusterChangedEvent); + clusterManagerMetrics.recordLatency( + clusterManagerMetrics.clusterStateListenersHistogram, + (double) Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - listenerStartTimeNS)), + Optional.of(Tags.create().addTag("Operation", listener.getClass().getSimpleName())) + ); } } catch (Exception ex) { logger.warn("failed to notify ClusterStateListener", ex); diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerService.java index e9224596e048d..fa8c965b4d538 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterManagerService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterManagerService.java @@ -8,6 +8,7 @@ package org.opensearch.cluster.service; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; @@ -20,7 +21,17 @@ */ @PublicApi(since = "2.2.0") public class ClusterManagerService extends MasterService { + public ClusterManagerService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { super(settings, clusterSettings, threadPool); } + + public ClusterManagerService( + Settings settings, + ClusterSettings clusterSettings, + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics + ) { + super(settings, clusterSettings, threadPool, clusterManagerMetrics); + } } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index aa7766979e851..c3c48dd8b87ef 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -32,6 +32,7 @@ package org.opensearch.cluster.service; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateApplier; @@ -53,6 +54,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexingPressureService; import org.opensearch.node.Node; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.threadpool.ThreadPool; import java.util.Collections; @@ -92,11 +94,20 @@ public class ClusterService extends AbstractLifecycleComponent { private IndexingPressureService indexingPressureService; public ClusterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this(settings, clusterSettings, threadPool, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); + } + + public ClusterService( + Settings settings, + ClusterSettings clusterSettings, + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics + ) { this( settings, clusterSettings, - new ClusterManagerService(settings, clusterSettings, threadPool), - new ClusterApplierService(Node.NODE_NAME_SETTING.get(settings), settings, clusterSettings, threadPool) + new ClusterManagerService(settings, clusterSettings, threadPool, clusterManagerMetrics), + new ClusterApplierService(Node.NODE_NAME_SETTING.get(settings), settings, clusterSettings, threadPool, clusterManagerMetrics) ); } diff --git a/server/src/main/java/org/opensearch/cluster/service/MasterService.java b/server/src/main/java/org/opensearch/cluster/service/MasterService.java index af3e4f8437c43..686e9793a8fd3 100644 --- a/server/src/main/java/org/opensearch/cluster/service/MasterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/MasterService.java @@ -39,6 +39,7 @@ import org.opensearch.action.support.PlainActionFuture; import org.opensearch.cluster.AckedClusterStateTaskListener; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterState.Builder; import org.opensearch.cluster.ClusterStateTaskConfig; @@ -70,6 +71,8 @@ import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.discovery.Discovery; import org.opensearch.node.Node; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; @@ -79,6 +82,7 @@ import java.util.Locale; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -135,8 +139,18 @@ public class MasterService extends AbstractLifecycleComponent { protected final ClusterManagerTaskThrottler clusterManagerTaskThrottler; private final ClusterManagerThrottlingStats throttlingStats; private final ClusterStateStats stateStats; + private final ClusterManagerMetrics clusterManagerMetrics; public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + this(settings, clusterSettings, threadPool, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); + } + + public MasterService( + Settings settings, + ClusterSettings clusterSettings, + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics + ) { this.nodeName = Objects.requireNonNull(Node.NODE_NAME_SETTING.get(settings)); this.slowTaskLoggingThreshold = CLUSTER_MANAGER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING.get(settings); @@ -154,6 +168,7 @@ public MasterService(Settings settings, ClusterSettings clusterSettings, ThreadP ); this.stateStats = new ClusterStateStats(); this.threadPool = threadPool; + this.clusterManagerMetrics = clusterManagerMetrics; } private void setSlowTaskLoggingThreshold(TimeValue slowTaskLoggingThreshold) { @@ -303,6 +318,12 @@ private void runTasks(TaskInputs taskInputs) { final TimeValue computationTime = getTimeSince(computationStartTime); logExecutionTime(computationTime, "compute cluster state update", summary); + clusterManagerMetrics.recordLatency( + clusterManagerMetrics.clusterStateComputeHistogram, + (double) computationTime.getMillis(), + Optional.of(Tags.create().addTag("Operation", taskInputs.executor.getClass().getSimpleName())) + ); + if (taskOutputs.clusterStateUnchanged()) { final long notificationStartTime = threadPool.preciseRelativeTimeInNanos(); taskOutputs.notifySuccessfulTasksOnUnchangedClusterState(); @@ -361,6 +382,7 @@ protected boolean blockingAllowed() { final long durationMillis = getTimeSince(startTimeNanos).millis(); stateStats.stateUpdateTook(durationMillis); stateStats.stateUpdated(); + clusterManagerMetrics.recordLatency(clusterManagerMetrics.clusterStatePublishHistogram, (double) durationMillis); } catch (Exception e) { stateStats.stateUpdateFailed(); onPublicationFailed(clusterChangedEvent, taskOutputs, startTimeNanos, e); diff --git a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java index 288371aa240a0..538dea5b2e60b 100644 --- a/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java +++ b/server/src/main/java/org/opensearch/discovery/DiscoveryModule.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.Coordinator; import org.opensearch.cluster.coordination.ElectionStrategy; @@ -133,7 +134,8 @@ public DiscoveryModule( RerouteService rerouteService, NodeHealthService nodeHealthService, PersistedStateRegistry persistedStateRegistry, - RemoteStoreNodeService remoteStoreNodeService + RemoteStoreNodeService remoteStoreNodeService, + ClusterManagerMetrics clusterManagerMetrics ) { final Collection> joinValidators = new ArrayList<>(); final Map> hostProviders = new HashMap<>(); @@ -211,7 +213,8 @@ public DiscoveryModule( electionStrategy, nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + clusterManagerMetrics ); } else { throw new IllegalArgumentException("Unknown discovery type [" + discoveryType + "]"); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 3ae78a41bb634..35b8295f26a08 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -58,6 +58,7 @@ import org.opensearch.client.Client; import org.opensearch.client.node.NodeClient; import org.opensearch.cluster.ClusterInfoService; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; @@ -606,21 +607,10 @@ protected Node( getCustomNameResolvers(pluginsService.filterPlugins(DiscoveryPlugin.class)) ); - List clusterPlugins = pluginsService.filterPlugins(ClusterPlugin.class); - final ClusterService clusterService = new ClusterService(settings, settingsModule.getClusterSettings(), threadPool); - clusterService.addStateApplier(scriptService); - resourcesToClose.add(clusterService); - final Set> consistentSettings = settingsModule.getConsistentSettings(); - if (consistentSettings.isEmpty() == false) { - clusterService.addLocalNodeMasterListener( - new ConsistentSettingsService(settings, clusterService, consistentSettings).newHashPublisher() - ); - } - TracerFactory tracerFactory; MetricsRegistryFactory metricsRegistryFactory; if (FeatureFlags.isEnabled(TELEMETRY)) { - final TelemetrySettings telemetrySettings = new TelemetrySettings(settings, clusterService.getClusterSettings()); + final TelemetrySettings telemetrySettings = new TelemetrySettings(settings, settingsModule.getClusterSettings()); if (telemetrySettings.isTracingFeatureEnabled() || telemetrySettings.isMetricsFeatureEnabled()) { List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); List telemetryPluginsImplementingTelemetryAware = telemetryPlugins.stream() @@ -660,6 +650,24 @@ protected Node( resourcesToClose.add(tracer::close); resourcesToClose.add(metricsRegistry::close); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); + + List clusterPlugins = pluginsService.filterPlugins(ClusterPlugin.class); + final ClusterService clusterService = new ClusterService( + settings, + settingsModule.getClusterSettings(), + threadPool, + clusterManagerMetrics + ); + clusterService.addStateApplier(scriptService); + resourcesToClose.add(clusterService); + final Set> consistentSettings = settingsModule.getConsistentSettings(); + if (consistentSettings.isEmpty() == false) { + clusterService.addLocalNodeMasterListener( + new ConsistentSettingsService(settings, clusterService, consistentSettings).newHashPublisher() + ); + } + final ClusterInfoService clusterInfoService = newClusterInfoService(settings, clusterService, threadPool, client); final UsageService usageService = new UsageService(); @@ -687,7 +695,8 @@ protected Node( clusterPlugins, clusterInfoService, snapshotsInfoService, - threadPool.getThreadContext() + threadPool.getThreadContext(), + clusterManagerMetrics ); modules.add(clusterModule); IndicesModule indicesModule = new IndicesModule(pluginsService.filterPlugins(MapperPlugin.class)); @@ -1186,7 +1195,8 @@ protected Node( rerouteService, fsHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + clusterManagerMetrics ); final SearchPipelineService searchPipelineService = new SearchPipelineService( clusterService, diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java index cf7080ab2fc06..ff9e41ee7c784 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIndicesThatCannotBeCreatedTests.java @@ -55,6 +55,7 @@ import org.opensearch.indices.SystemIndices; import org.opensearch.tasks.Task; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -153,7 +154,11 @@ private void indicesThatCannotBeCreatedTestCase( null, new IndexingPressureService( Settings.EMPTY, - new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) + ClusterServiceUtils.createClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + null + ) ), null, new SystemIndices(emptyMap()), diff --git a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java index da9156ccdb71a..a94a5d60b3f5a 100644 --- a/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java +++ b/server/src/test/java/org/opensearch/action/bulk/TransportBulkActionIngestTests.java @@ -71,6 +71,7 @@ import org.opensearch.ingest.IngestService; import org.opensearch.tasks.Task; import org.opensearch.telemetry.tracing.noop.NoopTracer; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.VersionUtils; import org.opensearch.threadpool.ThreadPool; @@ -170,7 +171,11 @@ class TestTransportBulkAction extends TransportBulkAction { ), new IndexingPressureService( SETTINGS, - new ClusterService(SETTINGS, new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null) + ClusterServiceUtils.createClusterService( + SETTINGS, + new ClusterSettings(SETTINGS, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + null + ) ), null, new SystemIndices(emptyMap()), diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java index f009988ffae17..91a2552ac3f04 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java @@ -45,6 +45,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.index.query.QueryBuilders; import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -90,7 +91,7 @@ public void testMultipleSlowLoggersUseSingleLog4jLogger() { LoggerContext context = (LoggerContext) LogManager.getContext(false); SearchPhaseContext searchPhaseContext1 = new MockSearchPhaseContext(1); - ClusterService clusterService1 = new ClusterService( + ClusterService clusterService1 = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null @@ -99,7 +100,7 @@ public void testMultipleSlowLoggersUseSingleLog4jLogger() { int numberOfLoggersBefore = context.getLoggers().size(); SearchPhaseContext searchPhaseContext2 = new MockSearchPhaseContext(1); - ClusterService clusterService2 = new ClusterService( + ClusterService clusterService2 = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null @@ -124,7 +125,7 @@ public void testOnRequestEnd() throws InterruptedException { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_TRACE_SETTING.getKey(), "0ms"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService, logger); final List searchListenersList = new ArrayList<>(List.of(searchRequestSlowLog)); @@ -157,7 +158,7 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_TRACE_SETTING.getKey(), "-1"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService, logger); final List searchListenersList = new ArrayList<>(List.of(searchRequestSlowLog)); @@ -321,7 +322,7 @@ public void testLevelSettingWarn() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL.getKey(), level); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); assertEquals(level, searchRequestSlowLog.getLevel()); } @@ -332,7 +333,7 @@ public void testLevelSettingDebug() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL.getKey(), level); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); assertEquals(level, searchRequestSlowLog.getLevel().toString()); } @@ -343,7 +344,7 @@ public void testLevelSettingFail() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_LEVEL.getKey(), level); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); try { new SearchRequestSlowLog(clusterService); @@ -363,7 +364,7 @@ public void testSetThresholds() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_TRACE_SETTING.getKey(), "100ms"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); assertEquals(TimeValue.timeValueMillis(400).nanos(), searchRequestSlowLog.getWarnThreshold()); assertEquals(TimeValue.timeValueMillis(300).nanos(), searchRequestSlowLog.getInfoThreshold()); @@ -380,7 +381,7 @@ public void testSetThresholdsUnits() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_TRACE_SETTING.getKey(), "100nanos"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); assertEquals(TimeValue.timeValueSeconds(400).nanos(), searchRequestSlowLog.getWarnThreshold()); assertEquals(TimeValue.timeValueMillis(300).nanos(), searchRequestSlowLog.getInfoThreshold()); @@ -395,7 +396,7 @@ public void testSetThresholdsDefaults() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_DEBUG_SETTING.getKey(), "200ms"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); SearchRequestSlowLog searchRequestSlowLog = new SearchRequestSlowLog(clusterService); assertEquals(TimeValue.timeValueMillis(400).nanos(), searchRequestSlowLog.getWarnThreshold()); assertEquals(TimeValue.timeValueMillis(-1).nanos(), searchRequestSlowLog.getInfoThreshold()); @@ -409,7 +410,7 @@ public void testSetThresholdsError() { settingsBuilder.put(SearchRequestSlowLog.CLUSTER_SEARCH_REQUEST_SLOWLOG_THRESHOLD_WARN_SETTING.getKey(), "NOT A TIME VALUE"); Settings settings = settingsBuilder.build(); ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); try { new SearchRequestSlowLog(clusterService); diff --git a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java index 33431c5c45958..f2d99a51f1c9a 100644 --- a/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/opensearch/cluster/ClusterModuleTests.java @@ -71,6 +71,8 @@ import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.gateway.GatewayAllocator; import org.opensearch.plugins.ClusterPlugin; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.gateway.TestGatewayAllocator; import org.opensearch.test.gateway.TestShardBatchGatewayAllocator; @@ -91,7 +93,7 @@ public class ClusterModuleTests extends ModuleTestCase { public void setUp() throws Exception { super.setUp(); threadContext = new ThreadContext(Settings.EMPTY); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null @@ -166,7 +168,7 @@ public void testRegisterAllocationDeciderDuplicate() { public Collection createAllocationDeciders(Settings settings, ClusterSettings clusterSettings) { return Collections.singletonList(new EnableAllocationDecider(settings, clusterSettings)); } - }), clusterInfoService, null, threadContext) + }), clusterInfoService, null, threadContext, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)) ); assertEquals(e.getMessage(), "Cannot specify allocation decider [" + EnableAllocationDecider.class.getName() + "] twice"); } @@ -177,7 +179,7 @@ public void testRegisterAllocationDecider() { public Collection createAllocationDeciders(Settings settings, ClusterSettings clusterSettings) { return Collections.singletonList(new FakeAllocationDecider()); } - }), clusterInfoService, null, threadContext); + }), clusterInfoService, null, threadContext, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); assertTrue(module.deciderList.stream().anyMatch(d -> d.getClass().equals(FakeAllocationDecider.class))); } @@ -187,7 +189,7 @@ private ClusterModule newClusterModuleWithShardsAllocator(Settings settings, Str public Map> getShardsAllocators(Settings settings, ClusterSettings clusterSettings) { return Collections.singletonMap(name, supplier); } - }), clusterInfoService, null, threadContext); + }), clusterInfoService, null, threadContext, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); } public void testRegisterShardsAllocator() { @@ -208,7 +210,15 @@ public void testUnknownShardsAllocator() { Settings settings = Settings.builder().put(ClusterModule.SHARDS_ALLOCATOR_TYPE_SETTING.getKey(), "dne").build(); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> new ClusterModule(settings, clusterService, Collections.emptyList(), clusterInfoService, null, threadContext) + () -> new ClusterModule( + settings, + clusterService, + Collections.emptyList(), + clusterInfoService, + null, + threadContext, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ) ); assertEquals("Unknown ShardsAllocator [dne]", e.getMessage()); } @@ -292,7 +302,8 @@ public void testRejectsReservedExistingShardsAllocatorName() { Collections.singletonList(existingShardsAllocatorPlugin(GatewayAllocator.ALLOCATOR_NAME)), clusterInfoService, null, - threadContext + threadContext, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); expectThrows( IllegalArgumentException.class, @@ -307,7 +318,8 @@ public void testRejectsDuplicateExistingShardsAllocatorName() { Arrays.asList(existingShardsAllocatorPlugin("duplicate"), existingShardsAllocatorPlugin("duplicate")), clusterInfoService, null, - threadContext + threadContext, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); expectThrows( IllegalArgumentException.class, diff --git a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java index a106706c00732..d0bc41b459cc3 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/FollowersCheckerTests.java @@ -33,6 +33,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.coordination.Coordinator.Mode; import org.opensearch.cluster.coordination.FollowersChecker.FollowerCheckRequest; @@ -47,6 +48,9 @@ import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.NodeHealthService; import org.opensearch.monitor.StatusInfo; +import org.opensearch.telemetry.TestInMemoryMetricsRegistry; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; @@ -131,6 +135,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req transportService.start(); transportService.acceptIncomingRequests(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final FollowersChecker followersChecker = new FollowersChecker( settings, clusterSettings, @@ -139,7 +145,8 @@ protected void onSendRequest(long requestId, String action, TransportRequest req (node, reason) -> { assert false : node; }, - () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info") + () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), + new ClusterManagerMetrics(metricsRegistry) ); followersChecker.setCurrentNodes(discoveryNodesHolder[0]); @@ -193,35 +200,43 @@ protected void onSendRequest(long requestId, String action, TransportRequest req followersChecker.clearCurrentNodes(); deterministicTaskQueue.runAllTasks(); assertThat(checkedNodes, empty()); + assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatDoesNotRespond() { final Settings settings = randomSettings(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( settings, () -> null, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis() + FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * FOLLOWER_CHECK_TIMEOUT_SETTING.get(settings).millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatRejectsCheck() { final Settings settings = randomSettings(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( settings, () -> { throw new OpenSearchException("simulated exception"); }, "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings).millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailureCounterResetsOnSuccess() { final Settings settings = randomSettings(); final int retryCount = FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings); final int maxRecoveries = randomIntBetween(3, 10); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); // passes just enough checks to keep it alive, up to maxRecoveries, and then fails completely testBehaviourOfFailingNode(settings, new Supplier() { @@ -241,18 +256,23 @@ public Empty get() { "followers check retry count exceeded", (FOLLOWER_CHECK_RETRY_COUNT_SETTING.get(settings) * (maxRecoveries + 1) - 1) * FOLLOWER_CHECK_INTERVAL_SETTING.get(settings) .millis(), - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatIsDisconnected() { + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( Settings.EMPTY, () -> { throw new ConnectTransportException(null, "simulated exception"); }, "disconnected", 0, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatDisconnects() { @@ -297,6 +317,7 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean nodeFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); final FollowersChecker followersChecker = new FollowersChecker( settings, @@ -307,7 +328,8 @@ public String toString() { assertTrue(nodeFailed.compareAndSet(false, true)); assertThat(reason, equalTo("disconnected")); }, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(metricsRegistry) ); DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build(); @@ -318,16 +340,20 @@ public String toString() { deterministicTaskQueue.runAllRunnableTasks(); assertTrue(nodeFailed.get()); assertThat(followersChecker.getFaultyNodes(), contains(otherNode)); + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } public void testFailsNodeThatIsUnhealthy() { + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); testBehaviourOfFailingNode( randomSettings(), () -> { throw new NodeHealthCheckFailureException("non writable exception"); }, "health check failed", 0, - () -> new StatusInfo(HEALTHY, "healthy-info") + () -> new StatusInfo(HEALTHY, "healthy-info"), + metricsRegistry ); + assertEquals(Integer.valueOf(2), metricsRegistry.getCounterStore().get("followers.checker.failure.count").getCounterValue()); } private void testBehaviourOfFailingNode( @@ -335,7 +361,8 @@ private void testBehaviourOfFailingNode( Supplier responder, String failureReason, long expectedFailureTime, - NodeHealthService nodeHealthService + NodeHealthService nodeHealthService, + MetricsRegistry metricsRegistry ) { final DiscoveryNode localNode = new DiscoveryNode("local-node", buildNewFakeTransportAddress(), Version.CURRENT); final DiscoveryNode otherNode = new DiscoveryNode("other-node", buildNewFakeTransportAddress(), Version.CURRENT); @@ -386,7 +413,6 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean nodeFailed = new AtomicBoolean(); - final FollowersChecker followersChecker = new FollowersChecker( settings, clusterSettings, @@ -396,7 +422,8 @@ public String toString() { assertTrue(nodeFailed.compareAndSet(false, true)); assertThat(reason, equalTo(failureReason)); }, - nodeHealthService + nodeHealthService, + new ClusterManagerMetrics(metricsRegistry) ); DiscoveryNodes discoveryNodes = DiscoveryNodes.builder().add(localNode).add(otherNode).localNodeId(localNode.getId()).build(); @@ -501,7 +528,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req if (exception != null) { throw exception; } - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(UNHEALTHY, "unhealthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(UNHEALTHY, "unhealthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); final long leaderTerm = randomLongBetween(2, Long.MAX_VALUE); final long followerTerm = randomLongBetween(1, leaderTerm - 1); @@ -574,7 +605,11 @@ protected void onSendRequest(long requestId, String action, TransportRequest req if (exception != null) { throw exception; } - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); { // Does not call into the coordinator in the normal case @@ -721,7 +756,11 @@ public void testPreferClusterManagerNodes() { ); final FollowersChecker followersChecker = new FollowersChecker(Settings.EMPTY, clusterSettings, transportService, fcr -> { assert false : fcr; - }, (node, reason) -> { assert false : node; }, () -> new StatusInfo(HEALTHY, "healthy-info")); + }, + (node, reason) -> { assert false : node; }, + () -> new StatusInfo(HEALTHY, "healthy-info"), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); followersChecker.setCurrentNodes(discoveryNodes); List followerTargets = Stream.of(capturingTransport.getCapturedRequestsAndClear()) .map(cr -> cr.node) diff --git a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java index fe65058333116..decdeddfa37a1 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/LeaderCheckerTests.java @@ -34,6 +34,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.coordination.LeaderChecker.LeaderCheckRequest; import org.opensearch.cluster.node.DiscoveryNode; @@ -44,6 +45,7 @@ import org.opensearch.core.transport.TransportResponse; import org.opensearch.core.transport.TransportResponse.Empty; import org.opensearch.monitor.StatusInfo; +import org.opensearch.telemetry.TestInMemoryMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.EqualsHashCodeTestUtils; import org.opensearch.test.EqualsHashCodeTestUtils.CopyFunction; @@ -175,11 +177,13 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), matchesRegex("node \\[.*\\] failed \\[[1-9][0-9]*\\] consecutive checks")); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); logger.info("--> creating first checker"); leaderChecker.updateLeader(leader1); @@ -229,6 +233,7 @@ public String toString() { ); } leaderChecker.updateLeader(null); + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } enum Response { @@ -293,10 +298,13 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); + final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), anyOf(endsWith("disconnected"), endsWith("disconnected during check"))); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); leaderChecker.updateLeader(leader); { @@ -351,6 +359,7 @@ public String toString() { deterministicTaskQueue.runAllRunnableTasks(); assertTrue(leaderFailed.get()); } + assertEquals(Integer.valueOf(3), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } public void testFollowerFailsImmediatelyOnHealthCheckFailure() { @@ -407,10 +416,12 @@ public String toString() { transportService.acceptIncomingRequests(); final AtomicBoolean leaderFailed = new AtomicBoolean(); + TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker(settings, clusterSettings, transportService, e -> { assertThat(e.getMessage(), endsWith("failed health checks")); assertTrue(leaderFailed.compareAndSet(false, true)); - }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info")); + }, () -> new StatusInfo(StatusInfo.Status.HEALTHY, "healthy-info"), clusterManagerMetrics); leaderChecker.updateLeader(leader); @@ -430,6 +441,8 @@ public String toString() { assertTrue(leaderFailed.get()); } + + assertEquals(Integer.valueOf(1), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } public void testLeaderBehaviour() { @@ -453,12 +466,15 @@ public void testLeaderBehaviour() { transportService.start(); transportService.acceptIncomingRequests(); + final TestInMemoryMetricsRegistry metricsRegistry = new TestInMemoryMetricsRegistry(); + final ClusterManagerMetrics clusterManagerMetrics = new ClusterManagerMetrics(metricsRegistry); final LeaderChecker leaderChecker = new LeaderChecker( settings, clusterSettings, transportService, e -> fail("shouldn't be checking anything"), - () -> nodeHealthServiceStatus.get() + () -> nodeHealthServiceStatus.get(), + clusterManagerMetrics ); final DiscoveryNodes discoveryNodes = DiscoveryNodes.builder() @@ -523,6 +539,7 @@ public void testLeaderBehaviour() { equalTo("rejecting leader check from [" + otherNode + "] sent to a node that is no longer the cluster-manager") ); } + assertEquals(Integer.valueOf(0), metricsRegistry.getCounterStore().get("leader.checker.failure.count").getCounterValue()); } private class CapturingTransportResponseHandler implements TransportResponseHandler { diff --git a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java index d94f3fb304fe2..f84f0326f4a9d 100644 --- a/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java +++ b/server/src/test/java/org/opensearch/cluster/coordination/NodeJoinTests.java @@ -33,6 +33,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.OpenSearchAllocationTestCase; @@ -61,6 +62,7 @@ import org.opensearch.monitor.StatusInfo; import org.opensearch.node.Node; import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; @@ -179,7 +181,8 @@ private void setupRealClusterManagerServiceAndCoordinator(long term, ClusterStat ClusterManagerService clusterManagerService = new ClusterManagerService( Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test_node").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool + threadPool, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); AtomicReference clusterStateRef = new AtomicReference<>(initialState); clusterManagerService.setClusterStatePublisher((event, publishListener, ackListener) -> { @@ -270,7 +273,8 @@ protected void onSendRequest( ElectionStrategy.DEFAULT_INSTANCE, nodeHealthService, persistedStateRegistry, - Mockito.mock(RemoteStoreNodeService.class) + Mockito.mock(RemoteStoreNodeService.class), + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); transportService.start(); transportService.acceptIncomingRequests(); diff --git a/server/src/test/java/org/opensearch/cluster/routing/allocation/AllocationServiceTests.java b/server/src/test/java/org/opensearch/cluster/routing/allocation/AllocationServiceTests.java index 64d9c243304d8..cce75105dd33f 100644 --- a/server/src/test/java/org/opensearch/cluster/routing/allocation/AllocationServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/routing/allocation/AllocationServiceTests.java @@ -33,6 +33,7 @@ import org.opensearch.Version; import org.opensearch.cluster.ClusterInfo; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.EmptyClusterInfoService; @@ -56,6 +57,9 @@ import org.opensearch.common.settings.Settings; import org.opensearch.gateway.GatewayAllocator; import org.opensearch.snapshots.EmptySnapshotsInfoService; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.gateway.TestGatewayAllocator; @@ -77,6 +81,12 @@ import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.not; +import static org.mockito.ArgumentMatchers.anyDouble; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class AllocationServiceTests extends OpenSearchTestCase { @@ -137,6 +147,16 @@ public void testAssignsPrimariesInPriorityOrderThenReplicas() { .put(CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING.getKey(), Integer.MAX_VALUE) .build(); final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + final MetricsRegistry metricsRegistry = mock(MetricsRegistry.class); + final Histogram rerouteHistogram = mock(Histogram.class); + final Histogram mockedHistogram = mock(Histogram.class); + when(metricsRegistry.createHistogram(anyString(), anyString(), anyString())).thenAnswer(invocationOnMock -> { + String histogramName = (String) invocationOnMock.getArguments()[0]; + if (histogramName.contains("reroute.latency")) { + return rerouteHistogram; + } + return mockedHistogram; + }); final AllocationService allocationService = new AllocationService( new AllocationDeciders( Arrays.asList( @@ -158,7 +178,8 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing } }, new EmptyClusterInfoService(), - EmptySnapshotsInfoService.INSTANCE + EmptySnapshotsInfoService.INSTANCE, + new ClusterManagerMetrics(metricsRegistry) ); final String unrealisticAllocatorName = "unrealistic"; @@ -258,10 +279,18 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing assertThat(routingTable3.index("mediumPriority").shardsWithState(ShardRoutingState.STARTED).size(), equalTo(4)); assertTrue(routingTable3.index("lowPriority").allPrimaryShardsActive()); assertThat(routingTable3.index("invalid").shardsWithState(ShardRoutingState.STARTED), empty()); + + verify(rerouteHistogram, times(3)).record(anyDouble()); } public void testExplainsNonAllocationOfShardWithUnknownAllocator() { - final AllocationService allocationService = new AllocationService(null, null, null, null); + final AllocationService allocationService = new AllocationService( + null, + null, + null, + null, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) + ); allocationService.setExistingShardsAllocators( Collections.singletonMap(GatewayAllocator.ALLOCATOR_NAME, new TestGatewayAllocator()) ); diff --git a/server/src/test/java/org/opensearch/cluster/service/ClusterApplierServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/ClusterApplierServiceTests.java index c5ed505e6bbf2..be6057a391b2e 100644 --- a/server/src/test/java/org/opensearch/cluster/service/ClusterApplierServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/ClusterApplierServiceTests.java @@ -35,6 +35,7 @@ import org.apache.logging.log4j.Level; import org.apache.logging.log4j.LogManager; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateObserver; @@ -51,6 +52,8 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.test.MockLogAppender; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.junit.annotations.TestLogging; @@ -64,6 +67,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; @@ -74,15 +78,30 @@ import static org.opensearch.test.ClusterServiceUtils.setState; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyDouble; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.atLeastOnce; +import static org.mockito.Mockito.clearInvocations; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; public class ClusterApplierServiceTests extends OpenSearchTestCase { private static ThreadPool threadPool; private TimedClusterApplierService clusterApplierService; + private static MetricsRegistry metricsRegistry; + private static Histogram applierslatencyHistogram; + private static Histogram listenerslatencyHistogram; @BeforeClass public static void createThreadPool() { threadPool = new TestThreadPool(ClusterApplierServiceTests.class.getName()); + metricsRegistry = mock(MetricsRegistry.class); + applierslatencyHistogram = mock(Histogram.class); + listenerslatencyHistogram = mock(Histogram.class); } @AfterClass @@ -96,7 +115,14 @@ public static void stopThreadPool() { @Before public void setUp() throws Exception { super.setUp(); - clusterApplierService = createTimedClusterService(true); + when(metricsRegistry.createHistogram(anyString(), anyString(), anyString())).thenAnswer(invocationOnMock -> { + String histogramName = (String) invocationOnMock.getArguments()[0]; + if (histogramName.contains("appliers.latency")) { + return applierslatencyHistogram; + } + return listenerslatencyHistogram; + }); + clusterApplierService = createTimedClusterService(true, Optional.of(metricsRegistry)); } @After @@ -105,13 +131,26 @@ public void tearDown() throws Exception { super.tearDown(); } - private TimedClusterApplierService createTimedClusterService(boolean makeClusterManager) { + private TimedClusterApplierService createTimedClusterService( + boolean makeClusterManager, + Optional metricsRegistryOptional + ) { DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); - TimedClusterApplierService timedClusterApplierService = new TimedClusterApplierService( - Settings.builder().put("cluster.name", "ClusterApplierServiceTests").build(), - new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool - ); + TimedClusterApplierService timedClusterApplierService; + if (metricsRegistryOptional != null && metricsRegistryOptional.isPresent()) { + timedClusterApplierService = new TimedClusterApplierService( + Settings.builder().put("cluster.name", "ClusterApplierServiceTests").build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool, + new ClusterManagerMetrics(metricsRegistry) + ); + } else { + timedClusterApplierService = new TimedClusterApplierService( + Settings.builder().put("cluster.name", "ClusterApplierServiceTests").build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } timedClusterApplierService.setNodeConnectionsService(createNoOpNodeConnectionsService()); timedClusterApplierService.setInitialState( ClusterState.builder(new ClusterName("ClusterApplierServiceTests")) @@ -194,6 +233,8 @@ public void onFailure(String source, Exception e) { }); assertBusy(mockAppender::assertAllExpectationsMatched); } + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } @TestLogging(value = "org.opensearch.cluster.service:WARN", reason = "to ensure that we log cluster state events on WARN level") @@ -291,10 +332,12 @@ public void onFailure(String source, Exception e) { latch.await(); mockAppender.assertAllExpectationsMatched(); } + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testLocalNodeClusterManagerListenerCallbacks() { - TimedClusterApplierService timedClusterApplierService = createTimedClusterService(false); + TimedClusterApplierService timedClusterApplierService = createTimedClusterService(false, Optional.empty()); AtomicBoolean isClusterManager = new AtomicBoolean(); timedClusterApplierService.addLocalNodeClusterManagerListener(new LocalNodeClusterManagerListener() { @@ -329,6 +372,8 @@ public void offClusterManager() { setState(timedClusterApplierService, state); assertThat(isClusterManager.get(), is(true)); + verifyNoInteractions(applierslatencyHistogram, listenerslatencyHistogram); + timedClusterApplierService.close(); } @@ -338,7 +383,7 @@ public void offClusterManager() { * To support inclusive language, LocalNodeMasterListener is deprecated in 2.2. */ public void testDeprecatedLocalNodeMasterListenerCallbacks() { - TimedClusterApplierService timedClusterApplierService = createTimedClusterService(false); + TimedClusterApplierService timedClusterApplierService = createTimedClusterService(false, Optional.empty()); AtomicBoolean isClusterManager = new AtomicBoolean(); timedClusterApplierService.addLocalNodeMasterListener(new LocalNodeMasterListener() { @@ -366,6 +411,8 @@ public void offMaster() { setState(timedClusterApplierService, state); assertThat(isClusterManager.get(), is(false)); + verifyNoInteractions(applierslatencyHistogram, listenerslatencyHistogram); + timedClusterApplierService.close(); } @@ -405,6 +452,10 @@ public void onFailure(String source, Exception e) { latch.await(); assertNull(error.get()); assertTrue(applierCalled.get()); + + verify(applierslatencyHistogram, atLeastOnce()).record(anyDouble(), any()); + clearInvocations(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testClusterStateApplierBubblesUpExceptionsInApplier() throws InterruptedException { @@ -435,6 +486,9 @@ public void onFailure(String source, Exception e) { latch.await(); assertNotNull(error.get()); assertThat(error.get().getMessage(), containsString("dummy exception")); + + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testClusterStateApplierBubblesUpExceptionsInSettingsApplier() throws InterruptedException { @@ -478,6 +532,9 @@ public void onFailure(String source, Exception e) { latch.await(); assertNotNull(error.get()); assertThat(error.get().getMessage(), containsString("illegal value can't update")); + + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testClusterStateApplierSwallowsExceptionInListener() throws InterruptedException { @@ -509,6 +566,9 @@ public void onFailure(String source, Exception e) { latch.await(); assertNull(error.get()); assertTrue(applierCalled.get()); + + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testClusterStateApplierCanCreateAnObserver() throws InterruptedException { @@ -565,6 +625,10 @@ public void onFailure(String source, Exception e) { latch.await(); assertNull(error.get()); assertTrue(applierCalled.get()); + + verify(applierslatencyHistogram, atLeastOnce()).record(anyDouble(), any()); + clearInvocations(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } public void testThreadContext() throws InterruptedException { @@ -609,6 +673,9 @@ public void onFailure(String source, Exception e) { } latch.await(); + + verifyNoInteractions(applierslatencyHistogram); + verifyNoInteractions(listenerslatencyHistogram); } static class TimedClusterApplierService extends ClusterApplierService { @@ -622,6 +689,16 @@ static class TimedClusterApplierService extends ClusterApplierService { this.clusterSettings = clusterSettings; } + TimedClusterApplierService( + Settings settings, + ClusterSettings clusterSettings, + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics + ) { + super("test_node", settings, clusterSettings, threadPool, clusterManagerMetrics); + this.clusterSettings = clusterSettings; + } + @Override protected long currentTimeInMillis() { if (currentTimeOverride != null) { diff --git a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java index 85f6c129944fa..8c84ac365dfd1 100644 --- a/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java +++ b/server/src/test/java/org/opensearch/cluster/service/MasterServiceTests.java @@ -40,6 +40,7 @@ import org.opensearch.Version; import org.opensearch.cluster.AckedClusterStateUpdateTask; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskConfig; @@ -61,6 +62,9 @@ import org.opensearch.common.util.concurrent.BaseFuture; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.node.Node; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.test.MockLogAppender; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.junit.annotations.TestLogging; @@ -76,6 +80,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.ConcurrentHashMap; @@ -95,6 +100,13 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasKey; import static org.hamcrest.Matchers.is; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyDouble; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class MasterServiceTests extends OpenSearchTestCase { @@ -125,15 +137,36 @@ public void randomizeCurrentTime() { } private ClusterManagerService createClusterManagerService(boolean makeClusterManager) { + return createClusterManagerService(makeClusterManager, Optional.empty()); + } + + private ClusterManagerService createClusterManagerService( + boolean makeClusterManager, + Optional metricsRegistryOptional + ) { final DiscoveryNode localNode = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); - final ClusterManagerService clusterManagerService = new ClusterManagerService( - Settings.builder() - .put(ClusterName.CLUSTER_NAME_SETTING.getKey(), MasterServiceTests.class.getSimpleName()) - .put(Node.NODE_NAME_SETTING.getKey(), "test_node") - .build(), - new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool - ); + final ClusterManagerService clusterManagerService; + if (metricsRegistryOptional != null && metricsRegistryOptional.isPresent()) { + clusterManagerService = new ClusterManagerService( + Settings.builder() + .put(ClusterName.CLUSTER_NAME_SETTING.getKey(), MasterServiceTests.class.getSimpleName()) + .put(Node.NODE_NAME_SETTING.getKey(), "test_node") + .build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool, + new ClusterManagerMetrics(metricsRegistryOptional.get()) + ); + } else { + clusterManagerService = new ClusterManagerService( + Settings.builder() + .put(ClusterName.CLUSTER_NAME_SETTING.getKey(), MasterServiceTests.class.getSimpleName()) + .put(Node.NODE_NAME_SETTING.getKey(), "test_node") + .build(), + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); + } + final ClusterState initialClusterState = ClusterState.builder(new ClusterName(MasterServiceTests.class.getSimpleName())) .nodes( DiscoveryNodes.builder() @@ -154,7 +187,18 @@ private ClusterManagerService createClusterManagerService(boolean makeClusterMan } public void testClusterManagerAwareExecution() throws Exception { - final ClusterManagerService nonClusterManager = createClusterManagerService(false); + final MetricsRegistry metricsRegistry = mock(MetricsRegistry.class); + final Histogram clusterStateComputeHistogram = mock(Histogram.class); + final Histogram clusterStatePublishHistogram = mock(Histogram.class); + when(metricsRegistry.createHistogram(anyString(), anyString(), anyString())).thenAnswer(invocationOnMock -> { + String histogramName = (String) invocationOnMock.getArguments()[0]; + if (histogramName.contains("cluster.state.new.compute.latency")) { + return clusterStateComputeHistogram; + } + return clusterStatePublishHistogram; + }); + + final ClusterManagerService nonClusterManager = createClusterManagerService(false, Optional.of(metricsRegistry)); final boolean[] taskFailed = { false }; final CountDownLatch latch1 = new CountDownLatch(1); @@ -194,6 +238,8 @@ public void onFailure(String source, Exception e) { assertFalse("non-cluster-manager cluster state update task was not executed", taskFailed[0]); nonClusterManager.close(); + + verify(clusterStateComputeHistogram, times(1)).record(anyDouble(), any()); } public void testThreadContext() throws InterruptedException { @@ -1070,7 +1116,8 @@ public void testLongClusterStateUpdateLogging() throws Exception { .put(Node.NODE_NAME_SETTING.getKey(), "test_node") .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool + threadPool, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ) ) { @@ -1246,6 +1293,18 @@ public void testAcking() throws InterruptedException { final DiscoveryNode node1 = new DiscoveryNode("node1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); final DiscoveryNode node2 = new DiscoveryNode("node2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); final DiscoveryNode node3 = new DiscoveryNode("node3", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + + final MetricsRegistry metricsRegistry = mock(MetricsRegistry.class); + final Histogram clusterStateComputeHistogram = mock(Histogram.class); + final Histogram clusterStatePublishHistogram = mock(Histogram.class); + when(metricsRegistry.createHistogram(anyString(), anyString(), anyString())).thenAnswer(invocationOnMock -> { + String histogramName = (String) invocationOnMock.getArguments()[0]; + if (histogramName.contains("cluster.state.new.compute.latency")) { + return clusterStateComputeHistogram; + } + return clusterStatePublishHistogram; + }); + try ( ClusterManagerService clusterManagerService = new ClusterManagerService( Settings.builder() @@ -1253,7 +1312,8 @@ public void testAcking() throws InterruptedException { .put(Node.NODE_NAME_SETTING.getKey(), "test_node") .build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool + threadPool, + new ClusterManagerMetrics(metricsRegistry) ) ) { @@ -1372,6 +1432,9 @@ public void onAckTimeout() { latch.await(); } } + + verify(clusterStateComputeHistogram, times(2)).record(anyDouble(), any()); + verify(clusterStatePublishHistogram, times(1)).record(anyDouble()); } public void testDeprecatedMasterServiceUpdateTaskThreadName() { diff --git a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java index b33ebf8333b36..5539b3237c2bf 100644 --- a/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java +++ b/server/src/test/java/org/opensearch/discovery/DiscoveryModuleTests.java @@ -32,6 +32,7 @@ package org.opensearch.discovery; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.Coordinator; import org.opensearch.cluster.coordination.PersistedStateRegistry; @@ -48,6 +49,7 @@ import org.opensearch.gateway.GatewayMetaState; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.plugins.DiscoveryPlugin; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.transport.MockTransportService; @@ -128,7 +130,8 @@ private DiscoveryModule newModule(Settings settings, List plugi mock(RerouteService.class), null, new PersistedStateRegistry(), - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); } diff --git a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java index 1c43bb565ef69..dd2fb51151a5b 100644 --- a/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java +++ b/server/src/test/java/org/opensearch/gateway/ClusterStateUpdatersTests.java @@ -55,6 +55,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.repositories.IndexId; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import java.util.Arrays; @@ -123,7 +124,7 @@ public String getValue(final String value) { }) ); - final ClusterService clusterService = new ClusterService(Settings.EMPTY, clusterSettings, null); + final ClusterService clusterService = ClusterServiceUtils.createClusterService(Settings.EMPTY, clusterSettings, null); final Metadata.Builder builder = Metadata.builder(); final Settings settings = Settings.builder().put("foo.old", randomAlphaOfLength(8)).build(); applySettingsToBuilder.accept(builder, settings); diff --git a/server/src/test/java/org/opensearch/gateway/GatewayServiceTests.java b/server/src/test/java/org/opensearch/gateway/GatewayServiceTests.java index c448c4b07e03b..59fb7df5428e2 100644 --- a/server/src/test/java/org/opensearch/gateway/GatewayServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/GatewayServiceTests.java @@ -53,6 +53,7 @@ import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.transport.TransportAddress; import org.opensearch.snapshots.EmptySnapshotsInfoService; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.gateway.TestGatewayAllocator; import org.hamcrest.Matchers; @@ -68,7 +69,7 @@ public class GatewayServiceTests extends OpenSearchTestCase { private GatewayService createService(final Settings.Builder settings) { - final ClusterService clusterService = new ClusterService( + final ClusterService clusterService = ClusterServiceUtils.createClusterService( Settings.builder().put("cluster.name", "GatewayServiceTests").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), null diff --git a/server/src/test/java/org/opensearch/index/IndexingPressureServiceTests.java b/server/src/test/java/org/opensearch/index/IndexingPressureServiceTests.java index 0b657c1c9745f..8db7c58bf9503 100644 --- a/server/src/test/java/org/opensearch/index/IndexingPressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/IndexingPressureServiceTests.java @@ -24,6 +24,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.stats.IndexingPressurePerShardStats; import org.opensearch.index.stats.IndexingPressureStats; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -44,7 +45,7 @@ public class IndexingPressureServiceTests extends OpenSearchTestCase { @Before public void beforeTest() { clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - clusterService = new ClusterService(settings, clusterSettings, null); + clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); } public void testCoordinatingOperationForShardIndexingPressure() { diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureConcurrentExecutionTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureConcurrentExecutionTests.java index ce719a18898f8..c5ad1370ac75a 100644 --- a/server/src/test/java/org/opensearch/index/ShardIndexingPressureConcurrentExecutionTests.java +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureConcurrentExecutionTests.java @@ -19,6 +19,7 @@ import org.opensearch.index.stats.IndexingPressurePerShardStats; import org.opensearch.index.stats.IndexingPressureStats; import org.opensearch.index.stats.ShardIndexingPressureStats; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.hamcrest.Matcher; import org.hamcrest.MatcherAssert; @@ -42,7 +43,7 @@ public class ShardIndexingPressureConcurrentExecutionTests extends OpenSearchTes .build(); private final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - private final ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + private final ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); public enum OperationType { COORDINATING, diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java index 023063c7d6e03..31ecad7c8d701 100644 --- a/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureMemoryManagerTests.java @@ -8,11 +8,11 @@ package org.opensearch.index; -import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import java.util.concurrent.TimeUnit; @@ -27,7 +27,7 @@ public class ShardIndexingPressureMemoryManagerTests extends OpenSearchTestCase .build(); private final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); private final ShardIndexingPressureSettings shardIndexingPressureSettings = new ShardIndexingPressureSettings( - new ClusterService(settings, clusterSettings, null), + ClusterServiceUtils.createClusterService(settings, clusterSettings, null), settings, IndexingPressure.MAX_INDEXING_BYTES.get(settings).getBytes() ); diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureSettingsTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureSettingsTests.java index c555d8f9c489d..5e84a76b2250a 100644 --- a/server/src/test/java/org/opensearch/index/ShardIndexingPressureSettingsTests.java +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureSettingsTests.java @@ -11,6 +11,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; public class ShardIndexingPressureSettingsTests extends OpenSearchTestCase { @@ -24,7 +25,7 @@ public class ShardIndexingPressureSettingsTests extends OpenSearchTestCase { .build(); final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - final ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + final ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); public void testFromSettings() { ShardIndexingPressureSettings shardIndexingPressureSettings = new ShardIndexingPressureSettings( diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureStoreTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureStoreTests.java index 46f9801035ac3..d97eec4cc001d 100644 --- a/server/src/test/java/org/opensearch/index/ShardIndexingPressureStoreTests.java +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureStoreTests.java @@ -8,10 +8,10 @@ package org.opensearch.index; -import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.junit.Before; @@ -22,7 +22,7 @@ public class ShardIndexingPressureStoreTests extends OpenSearchTestCase { private final Settings settings = Settings.builder().put(ShardIndexingPressureStore.MAX_COLD_STORE_SIZE.getKey(), 200).build(); private final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); private final ShardIndexingPressureSettings shardIndexingPressureSettings = new ShardIndexingPressureSettings( - new ClusterService(settings, clusterSettings, null), + ClusterServiceUtils.createClusterService(settings, clusterSettings, null), settings, IndexingPressure.MAX_INDEXING_BYTES.get(settings).getBytes() ); diff --git a/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java b/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java index e7600b1d4c41a..ddc3592511de4 100644 --- a/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java +++ b/server/src/test/java/org/opensearch/index/ShardIndexingPressureTests.java @@ -17,6 +17,7 @@ import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.stats.IndexingPressurePerShardStats; import org.opensearch.index.stats.IndexingPressureStats; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; public class ShardIndexingPressureTests extends OpenSearchTestCase { @@ -30,7 +31,7 @@ public class ShardIndexingPressureTests extends OpenSearchTestCase { .build(); final ClusterSettings clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - final ClusterService clusterService = new ClusterService(settings, clusterSettings, null); + final ClusterService clusterService = ClusterServiceUtils.createClusterService(settings, clusterSettings, null); public void testMemoryBytesMarkedAndReleased() { ShardIndexingPressure shardIndexingPressure = new ShardIndexingPressure(settings, clusterService); diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java index ccdd1fe4ab609..280598c516c3c 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteSegmentTransferTrackerTests.java @@ -15,6 +15,7 @@ import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.store.DirectoryFileTransferTracker; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -42,7 +43,7 @@ public class RemoteSegmentTransferTrackerTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("remote_refresh_segment_pressure_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java index 9d00cf9f2be46..18d18f2dc30b1 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureServiceTests.java @@ -14,6 +14,7 @@ import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.shard.IndexShard; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -45,7 +46,7 @@ public class RemoteStorePressureServiceTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("remote_refresh_segment_pressure_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureSettingsTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureSettingsTests.java index 064c6c10eba02..7c1ef0de91887 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureSettingsTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStorePressureSettingsTests.java @@ -11,6 +11,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -27,7 +28,7 @@ public class RemoteStorePressureSettingsTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("remote_refresh_segment_pressure_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/index/remote/RemoteStoreStatsTrackerFactoryTests.java b/server/src/test/java/org/opensearch/index/remote/RemoteStoreStatsTrackerFactoryTests.java index c300f316ac633..2bc4792a9c31c 100644 --- a/server/src/test/java/org/opensearch/index/remote/RemoteStoreStatsTrackerFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/remote/RemoteStoreStatsTrackerFactoryTests.java @@ -13,6 +13,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.shard.IndexShard; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -39,7 +40,11 @@ public void setUp() throws Exception { RemoteStoreStatsTrackerFactory.Defaults.MOVING_AVERAGE_WINDOW_SIZE_MIN_VALUE ) .build(); - clusterService = new ClusterService(settings, new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool); + clusterService = ClusterServiceUtils.createClusterService( + settings, + new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ); remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory(clusterService, settings); } @@ -85,7 +90,11 @@ public void testInvalidMovingAverageWindowSize() { "Failed to parse value", IllegalArgumentException.class, () -> new RemoteStoreStatsTrackerFactory( - new ClusterService(settings, new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool), + ClusterServiceUtils.createClusterService( + settings, + new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ), settings ) ); @@ -107,7 +116,11 @@ public void testUpdateAfterGetConfiguredSettings() { public void testGetDefaultSettings() { remoteStoreStatsTrackerFactory = new RemoteStoreStatsTrackerFactory( - new ClusterService(Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool), + ClusterServiceUtils.createClusterService( + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), + threadPool + ), Settings.EMPTY ); // Check moving average window size updated diff --git a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java index d1516a6973d62..94269de9349fe 100644 --- a/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java +++ b/server/src/test/java/org/opensearch/index/shard/RemoteStoreRefreshListenerTests.java @@ -41,6 +41,7 @@ import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.threadpool.ThreadPool; import org.junit.After; @@ -87,7 +88,7 @@ public void setup(boolean primary, int numberOfDocs) throws IOException { indexShard.refresh("test"); } - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool @@ -804,7 +805,7 @@ private Tuple mockIn return null; }).when(emptyCheckpointPublisher).publish(any(), any()); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/node/ResponseCollectorServiceTests.java b/server/src/test/java/org/opensearch/node/ResponseCollectorServiceTests.java index 7ca1f1e864b99..7567224f16ac3 100644 --- a/server/src/test/java/org/opensearch/node/ResponseCollectorServiceTests.java +++ b/server/src/test/java/org/opensearch/node/ResponseCollectorServiceTests.java @@ -41,6 +41,7 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -63,7 +64,7 @@ public class ResponseCollectorServiceTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadpool = new TestThreadPool("response_collector_tests"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadpool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java index 4f615290f1805..fbb083a3ae419 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlServiceTests.java @@ -17,6 +17,7 @@ import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CpuBasedAdmissionControllerSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -33,7 +34,7 @@ public class AdmissionControlServiceTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java index c11ee1cc608f6..fbadcad804b31 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/AdmissionControlSettingsTests.java @@ -13,6 +13,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -28,7 +29,7 @@ public class AdmissionControlSettingsTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CpuBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CpuBasedAdmissionControllerTests.java index e72c0cd58ed64..f2cb45a033460 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CpuBasedAdmissionControllerTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/CpuBasedAdmissionControllerTests.java @@ -15,6 +15,7 @@ import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CpuBasedAdmissionControllerSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -31,7 +32,7 @@ public class CpuBasedAdmissionControllerTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/IoBasedAdmissionControllerTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/IoBasedAdmissionControllerTests.java index c5a2208f49ce6..54cb438e14ce6 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/IoBasedAdmissionControllerTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/controllers/IoBasedAdmissionControllerTests.java @@ -15,6 +15,7 @@ import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.IoBasedAdmissionControllerSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -31,7 +32,7 @@ public class IoBasedAdmissionControllerTests extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettingsTests.java index 6836ecb3d615f..f5686f33e7f50 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettingsTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/CPUBasedAdmissionControllerSettingsTests.java @@ -13,6 +13,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -28,7 +29,7 @@ public class CPUBasedAdmissionControllerSettingsTests extends OpenSearchTestCase public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/IoBasedAdmissionControllerSettingsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/IoBasedAdmissionControllerSettingsTests.java index c462f9700264d..3f157531f6c9a 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/IoBasedAdmissionControllerSettingsTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/settings/IoBasedAdmissionControllerSettingsTests.java @@ -21,6 +21,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -36,7 +37,7 @@ public class IoBasedAdmissionControllerSettingsTests extends OpenSearchTestCase public void setUp() throws Exception { super.setUp(); threadPool = new TestThreadPool("io_based_admission_controller_settings_test"); - clusterService = new ClusterService( + clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStatsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStatsTests.java index 7b4db5f787d6e..da57ef9f06a1a 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStatsTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControlStatsTests.java @@ -20,6 +20,7 @@ import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CpuBasedAdmissionControllerSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -46,7 +47,7 @@ public void setUp() throws Exception { ) .build(); threadPool = new TestThreadPool("admission_controller_settings_test"); - ClusterService clusterService = new ClusterService( + ClusterService clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControllerStatsTests.java b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControllerStatsTests.java index fe0399e79a5f4..0ef9aa61bb827 100644 --- a/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControllerStatsTests.java +++ b/server/src/test/java/org/opensearch/ratelimitting/admissioncontrol/stats/AdmissionControllerStatsTests.java @@ -20,6 +20,7 @@ import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlActionType; import org.opensearch.ratelimitting.admissioncontrol.enums.AdmissionControlMode; import org.opensearch.ratelimitting.admissioncontrol.settings.CpuBasedAdmissionControllerSettings; +import org.opensearch.test.ClusterServiceUtils; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; import org.opensearch.threadpool.ThreadPool; @@ -43,7 +44,7 @@ public void setUp() throws Exception { ) .build(); threadPool = new TestThreadPool("admission_controller_settings_test"); - ClusterService clusterService = new ClusterService( + ClusterService clusterService = ClusterServiceUtils.createClusterService( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), threadPool diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index d57abcf45a789..3848dce5a14ec 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -112,6 +112,7 @@ import org.opensearch.cluster.ClusterChangedEvent; import org.opensearch.cluster.ClusterInfo; import org.opensearch.cluster.ClusterInfoService; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; @@ -2552,7 +2553,8 @@ public void start(ClusterState initialState) { ElectionStrategy.DEFAULT_INSTANCE, () -> new StatusInfo(HEALTHY, "healthy-info"), persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterManagerService.setClusterStatePublisher(coordinator); coordinator.start(); diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java new file mode 100644 index 0000000000000..d9aee5ebfa941 --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryCounter.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.HashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * This is a simple implementation of Counter which is utilized by TestInMemoryMetricsRegistry for + * Unit Tests. It initializes an atomic integer to add the values of counter which doesn't have any tags + * along with a map to store the values recorded against the tags. + * The map and atomic integer can then be used to get the added values. + */ +public class TestInMemoryCounter implements Counter { + + private AtomicInteger counterValue = new AtomicInteger(0); + private ConcurrentHashMap, Double> counterValueForTags = new ConcurrentHashMap<>(); + + public Integer getCounterValue() { + return this.counterValue.get(); + } + + public ConcurrentHashMap, Double> getCounterValueForTags() { + return this.counterValueForTags; + } + + @Override + public void add(double value) { + counterValue.addAndGet((int) value); + } + + @Override + public synchronized void add(double value, Tags tags) { + HashMap hashMap = (HashMap) tags.getTagsMap(); + if (counterValueForTags.get(hashMap) == null) { + counterValueForTags.put(hashMap, value); + } else { + value = counterValueForTags.get(hashMap) + value; + counterValueForTags.put(hashMap, value); + } + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java new file mode 100644 index 0000000000000..ff28df2b6529d --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryHistogram.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.util.HashMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * This is a simple implementation of Histogram which is utilized by TestInMemoryMetricsRegistry for + * Unit Tests. It initializes an atomic integer to record the value of histogram which doesn't have any tags + * along with a map to store the values recorded against the tags. + * The map and atomic integer can then be used to get the recorded values. + */ +public class TestInMemoryHistogram implements Histogram { + + private AtomicInteger histogramValue = new AtomicInteger(0); + private ConcurrentHashMap, Double> histogramValueForTags = new ConcurrentHashMap<>(); + + public Integer getHistogramValue() { + return this.histogramValue.get(); + } + + public ConcurrentHashMap, Double> getHistogramValueForTags() { + return this.histogramValueForTags; + } + + @Override + public void record(double value) { + histogramValue.addAndGet((int) value); + } + + @Override + public synchronized void record(double value, Tags tags) { + HashMap hashMap = (HashMap) tags.getTagsMap(); + histogramValueForTags.put(hashMap, value); + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java new file mode 100644 index 0000000000000..6d395085b12ea --- /dev/null +++ b/server/src/test/java/org/opensearch/telemetry/TestInMemoryMetricsRegistry.java @@ -0,0 +1,71 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry; + +import org.opensearch.telemetry.metrics.Counter; +import org.opensearch.telemetry.metrics.Histogram; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.tags.Tags; + +import java.io.Closeable; +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; + +/** + * This is a simple implementation of MetricsRegistry which can be utilized by Unit Tests. + * It just initializes and stores counters/histograms within a map, once created. + * The maps can then be used to get the counters/histograms by their names. + */ +public class TestInMemoryMetricsRegistry implements MetricsRegistry { + + private ConcurrentHashMap counterStore = new ConcurrentHashMap<>(); + private ConcurrentHashMap histogramStore = new ConcurrentHashMap<>(); + + public ConcurrentHashMap getCounterStore() { + return this.counterStore; + } + + public ConcurrentHashMap getHistogramStore() { + return this.histogramStore; + } + + @Override + public Counter createCounter(String name, String description, String unit) { + TestInMemoryCounter counter = new TestInMemoryCounter(); + counterStore.putIfAbsent(name, counter); + return counter; + } + + @Override + public Counter createUpDownCounter(String name, String description, String unit) { + /** + * ToDo: To be implemented when required. + */ + return null; + } + + @Override + public Histogram createHistogram(String name, String description, String unit) { + TestInMemoryHistogram histogram = new TestInMemoryHistogram(); + histogramStore.putIfAbsent(name, histogram); + return histogram; + } + + @Override + public Closeable createGauge(String name, String description, String unit, Supplier valueProvider, Tags tags) { + /** + * ToDo: To be implemented when required. + */ + return null; + } + + @Override + public void close() throws IOException {} +} diff --git a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java index 0754cc1793dc8..1c2270bab1260 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/cluster/coordination/AbstractCoordinatorTestCase.java @@ -39,6 +39,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterModule; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateTaskListener; @@ -89,6 +90,7 @@ import org.opensearch.monitor.StatusInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; import org.opensearch.repositories.RepositoriesService; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.telemetry.tracing.noop.NoopTracer; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.disruption.DisruptableMockTransport; @@ -1144,7 +1146,8 @@ protected Optional getDisruptableMockTransport(Transpo settings, clusterSettings, deterministicTaskQueue, - threadPool + threadPool, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterService = new ClusterService(settings, clusterSettings, clusterManagerService, clusterApplierService); clusterService.setNodeConnectionsService( @@ -1180,7 +1183,8 @@ protected Optional getDisruptableMockTransport(Transpo getElectionStrategy(), nodeHealthService, persistedStateRegistry, - remoteStoreNodeService + remoteStoreNodeService, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); clusterManagerService.setClusterStatePublisher(coordinator); final GatewayService gatewayService = new GatewayService( @@ -1594,9 +1598,10 @@ static class DisruptableClusterApplierService extends ClusterApplierService { Settings settings, ClusterSettings clusterSettings, DeterministicTaskQueue deterministicTaskQueue, - ThreadPool threadPool + ThreadPool threadPool, + ClusterManagerMetrics clusterManagerMetrics ) { - super(nodeName, settings, clusterSettings, threadPool); + super(nodeName, settings, clusterSettings, threadPool, clusterManagerMetrics); this.nodeName = nodeName; this.deterministicTaskQueue = deterministicTaskQueue; addStateApplier(event -> { diff --git a/test/framework/src/main/java/org/opensearch/cluster/service/FakeThreadPoolClusterManagerService.java b/test/framework/src/main/java/org/opensearch/cluster/service/FakeThreadPoolClusterManagerService.java index 3ca938c99b5fd..53ef595c7931e 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/service/FakeThreadPoolClusterManagerService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/service/FakeThreadPoolClusterManagerService.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.coordination.ClusterStatePublisher.AckListener; import org.opensearch.common.UUIDs; @@ -45,6 +46,7 @@ import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.action.ActionListener; import org.opensearch.node.Node; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.threadpool.ThreadPool; import java.util.ArrayList; @@ -74,7 +76,8 @@ public FakeThreadPoolClusterManagerService( super( Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), nodeName).build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool + threadPool, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); this.name = serviceName; this.onTaskAvailableToRun = onTaskAvailableToRun; diff --git a/test/framework/src/main/java/org/opensearch/test/ClusterServiceUtils.java b/test/framework/src/main/java/org/opensearch/test/ClusterServiceUtils.java index 8f4f510da5ec3..f0c0e9bc2d589 100644 --- a/test/framework/src/main/java/org/opensearch/test/ClusterServiceUtils.java +++ b/test/framework/src/main/java/org/opensearch/test/ClusterServiceUtils.java @@ -34,6 +34,7 @@ import org.apache.logging.log4j.core.util.Throwables; import org.opensearch.OpenSearchException; import org.opensearch.Version; +import org.opensearch.cluster.ClusterManagerMetrics; import org.opensearch.cluster.ClusterName; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.ClusterStateUpdateTask; @@ -52,6 +53,8 @@ import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.node.Node; +import org.opensearch.telemetry.metrics.MetricsRegistry; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; import org.opensearch.threadpool.ThreadPool; import java.util.Collections; @@ -66,7 +69,8 @@ public static ClusterManagerService createClusterManagerService(ThreadPool threa ClusterManagerService clusterManagerService = new ClusterManagerService( Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test_cluster_manager_node").build(), new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS), - threadPool + threadPool, + new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE) ); AtomicReference clusterStateRef = new AtomicReference<>(initialClusterState); clusterManagerService.setClusterStatePublisher((event, publishListener, ackListener) -> { @@ -169,8 +173,22 @@ public static ClusterService createClusterService(ThreadPool threadPool, Discove } public static ClusterService createClusterService(ThreadPool threadPool, DiscoveryNode localNode, ClusterSettings clusterSettings) { + return createClusterService(threadPool, localNode, clusterSettings, NoopMetricsRegistry.INSTANCE); + } + + public static ClusterService createClusterService( + ThreadPool threadPool, + DiscoveryNode localNode, + ClusterSettings clusterSettings, + MetricsRegistry metricsRegistry + ) { Settings settings = Settings.builder().put("node.name", "test").put("cluster.name", "ClusterServiceTests").build(); - ClusterService clusterService = new ClusterService(settings, clusterSettings, threadPool); + ClusterService clusterService = new ClusterService( + settings, + clusterSettings, + threadPool, + new ClusterManagerMetrics(metricsRegistry) + ); clusterService.setNodeConnectionsService(createNoOpNodeConnectionsService()); ClusterState initialClusterState = ClusterState.builder(new ClusterName(ClusterServiceUtils.class.getSimpleName())) .nodes(DiscoveryNodes.builder().add(localNode).localNodeId(localNode.getId()).clusterManagerNodeId(localNode.getId())) @@ -184,6 +202,10 @@ public static ClusterService createClusterService(ThreadPool threadPool, Discove return clusterService; } + public static ClusterService createClusterService(Settings settings, ClusterSettings clusterSettings, ThreadPool threadPool) { + return new ClusterService(settings, clusterSettings, threadPool, new ClusterManagerMetrics(NoopMetricsRegistry.INSTANCE)); + } + public static NodeConnectionsService createNoOpNodeConnectionsService() { return new NodeConnectionsService(Settings.EMPTY, null, null) { @Override