From ca65467a9e2db690ef43ceb96d68a8bef3652331 Mon Sep 17 00:00:00 2001 From: Manoj Nagarajan Date: Thu, 14 Nov 2024 11:21:56 -0800 Subject: [PATCH] otel integration to venice-router --- build.gradle | 5 + .../AbstractVeniceAggVersionedStats.java | 6 +- .../stats/AggHostLevelIngestionStats.java | 4 +- .../stats/AggKafkaConsumerServiceStats.java | 4 +- .../davinci/stats/VeniceVersionedStats.java | 4 +- .../stats/VeniceVersionedStatsReporter.java | 4 +- .../heartbeat/HeartbeatVersionedStats.java | 4 +- internal/venice-client-common/build.gradle | 5 + .../com/linkedin/venice/read/RequestType.java | 13 +- .../venice/stats/AbstractVeniceAggStats.java | 61 +++- ...va => StatsSupplierMetricsRepository.java} | 2 +- .../StatsSupplierVeniceMetricsRepository.java | 19 ++ .../linkedin/venice/stats/TehutiUtils.java | 14 + .../venice/stats/VeniceMetricsConfig.java | 235 ++++++++++++++ .../venice/stats/VeniceMetricsRepository.java | 192 +++++++++++ .../VeniceOpenTelemetryConfigProperties.java | 284 +++++++++++++++++ .../VeniceOpenTelemetryMetricFormat.java | 34 ++ .../VeniceOpenTelemetryMetricsRepository.java | 297 ++++++++++++++++++ .../venice/utils/VeniceProperties.java | 4 + .../utils/metrics/MetricsRepositoryUtils.java | 19 ++ .../venice/stats/VeniceMetricsConfigTest.java | 107 +++++++ .../stats/VeniceMetricsRepositoryTest.java | 130 ++++++++ ...iceOpenTelemetryMetricsRepositoryTest.java | 141 +++++++++ .../stats/AbstractVeniceAggStoreStats.java | 7 +- .../VeniceHttpResponseStatusCodeCategory.java | 108 +++++++ .../dimensions/VeniceMetricsDimensions.java | 48 +++ .../VeniceRequestRetryAbortReason.java | 17 + .../dimensions/VeniceRequestRetryType.java | 15 + .../VeniceRequestValidationOutcome.java | 15 + .../VeniceResponseStatusCategory.java | 15 + ...iceHttpResponseStatusCodeCategoryTest.java | 56 ++++ .../VeniceMetricsDimensionsTest.java | 123 ++++++++ .../VeniceRequestRetryAbortReasonTest.java | 30 ++ .../VeniceRequestRetryTypeTest.java | 24 ++ .../VeniceRequestValidationOutcomeTest.java | 24 ++ .../VeniceResponseStatusCategoryTest.java | 33 ++ .../utils/VeniceRouterWrapper.java | 15 +- .../router/api/TestVeniceDispatcher.java | 4 +- services/venice-router/build.gradle | 4 + .../linkedin/venice/router/RouterServer.java | 49 ++- .../api/RouterExceptionAndTrackingUtils.java | 6 +- .../venice/router/api/VeniceDispatcher.java | 4 +- .../venice/router/api/VenicePathParser.java | 6 +- .../router/api/VeniceResponseAggregator.java | 19 +- .../router/api/VeniceVersionFinder.java | 6 +- .../api/routing/helix/HelixGroupSelector.java | 4 +- .../ApacheHttpAsyncStorageNodeClient.java | 4 +- .../router/stats/AdminOperationsStats.java | 4 +- .../router/stats/AggHostHealthStats.java | 11 +- .../stats/AggRouterHttpRequestStats.java | 79 +++-- .../venice/router/stats/HealthCheckStats.java | 4 +- .../venice/router/stats/HelixGroupStats.java | 4 +- .../venice/router/stats/HostHealthStats.java | 4 +- .../router/stats/RouteHttpRequestStats.java | 8 +- .../venice/router/stats/RouteHttpStats.java | 8 +- .../stats/RouterCurrentVersionStats.java | 4 +- .../router/stats/RouterHttpRequestStats.java | 272 +++++++++++++--- .../router/stats/RouterThrottleStats.java | 4 +- .../venice/router/stats/SecurityStats.java | 4 +- .../router/stats/StaleVersionStats.java | 4 +- .../router/AggRouterHttpRequestStatsTest.java | 29 +- .../router/RouteHttpRequestStatsTest.java | 15 +- .../router/api/TestVeniceDelegateMode.java | 10 +- .../router/api/TestVenicePathParser.java | 15 +- .../api/TestVeniceResponseAggregator.java | 2 +- .../router/api/TestVeniceVersionFinder.java | 18 +- .../api/path/TestVeniceMultiGetPath.java | 7 +- .../router/api/path/TestVenicePath.java | 6 +- .../stats/AdminOperationsStatsTest.java | 4 +- .../stats/AggServerHttpRequestStats.java | 2 +- 70 files changed, 2525 insertions(+), 217 deletions(-) rename internal/venice-client-common/src/main/java/com/linkedin/venice/stats/{StatsSupplier.java => StatsSupplierMetricsRepository.java} (89%) create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java create mode 100644 internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java create mode 100644 internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java create mode 100644 internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java create mode 100644 internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java diff --git a/build.gradle b/build.gradle index 9f4fa4e3fa..13249e4291 100644 --- a/build.gradle +++ b/build.gradle @@ -139,6 +139,11 @@ ext.libraries = [ zkclient: 'com.101tec:zkclient:0.7', // For Kafka AdminUtils zookeeper: 'org.apache.zookeeper:zookeeper:3.6.3', zstd: 'com.github.luben:zstd-jni:1.5.2-3', + opentelemetryApi: "io.opentelemetry:opentelemetry-api:1.43.0", + opentelemetrySdk: "io.opentelemetry:opentelemetry-sdk:1.43.0", + opentelemetryExporterLogging: "io.opentelemetry:opentelemetry-exporter-logging:1.43.0", + opentelemetryExporterOtlp: "io.opentelemetry:opentelemetry-exporter-otlp:1.43.0", + opentelemetryExporterCommon: "io.opentelemetry:opentelemetry-exporter-common:1.43.0" ] group = 'com.linkedin.venice' diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java index 417da42f60..8bbd78c2bc 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AbstractVeniceAggVersionedStats.java @@ -7,7 +7,7 @@ import com.linkedin.venice.meta.StoreDataChangedListener; import com.linkedin.venice.meta.Version; import com.linkedin.venice.meta.VersionStatus; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.tehuti.metrics.MetricsRepository; @@ -25,7 +25,7 @@ public abstract class AbstractVeniceAggVersionedStats statsInitiator; - private final StatsSupplier reporterSupplier; + private final StatsSupplierMetricsRepository reporterSupplier; protected final ReadOnlyStoreRepository metadataRepository; private final MetricsRepository metricsRepository; @@ -37,7 +37,7 @@ public AbstractVeniceAggVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplier reporterSupplier, + StatsSupplierMetricsRepository reporterSupplier, boolean unregisterMetricForDeletedStoreEnabled) { this.metadataRepository = metadataRepository; this.metricsRepository = metricsRepository; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java index 463201f2b5..3ea802cb66 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggHostLevelIngestionStats.java @@ -5,7 +5,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.Time; import io.tehuti.metrics.MetricsRepository; import java.util.Map; @@ -29,7 +29,7 @@ public AggHostLevelIngestionStats( unregisterMetricForDeletedStoreEnabled); } - static class HostLevelStoreIngestionStatsSupplier implements StatsSupplier { + static class HostLevelStoreIngestionStatsSupplier implements StatsSupplierMetricsRepository { private final VeniceServerConfig serverConfig; private final Map ingestionTaskMap; private final Time time; diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java index a0756ba318..d4e6055154 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/AggKafkaConsumerServiceStats.java @@ -3,7 +3,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import com.linkedin.venice.utils.SystemTime; import io.tehuti.metrics.MetricsRepository; import java.util.function.LongSupplier; @@ -99,7 +99,7 @@ public void recordTotalLatestOffsetIsPresent() { totalStats.recordLatestOffsetIsPresent(); } - static class KafkaConsumerServiceStatsSupplier implements StatsSupplier { + static class KafkaConsumerServiceStatsSupplier implements StatsSupplierMetricsRepository { private final LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool; KafkaConsumerServiceStatsSupplier(LongSupplier getMaxElapsedTimeSinceLastPollInConsumerPool) { diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java index ab2e88f53e..fcb1c24aa1 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStats.java @@ -1,6 +1,6 @@ package com.linkedin.davinci.stats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; @@ -25,7 +25,7 @@ public VeniceVersionedStats( MetricsRepository metricsRepository, String storeName, Supplier statsInitiator, - StatsSupplier reporterSupplier) { + StatsSupplierMetricsRepository reporterSupplier) { this.storeName = storeName; this.versionedStats = new Int2ObjectOpenHashMap<>(); this.reporters = new VeniceVersionedStatsReporter<>(metricsRepository, storeName, reporterSupplier); diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java index dd112be658..4753733881 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/VeniceVersionedStatsReporter.java @@ -4,7 +4,7 @@ import com.linkedin.venice.common.VeniceSystemStoreUtils; import com.linkedin.venice.stats.AbstractVeniceStats; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; @@ -22,7 +22,7 @@ public class VeniceVersionedStatsReporter statsSupplier) { + StatsSupplierMetricsRepository statsSupplier) { super(metricsRepository, storeName); this.isSystemStore = VeniceSystemStoreUtils.isSystemStore(storeName); diff --git a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java index 1e303f3e8a..295129e281 100644 --- a/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java +++ b/clients/da-vinci-client/src/main/java/com/linkedin/davinci/stats/ingestion/heartbeat/HeartbeatVersionedStats.java @@ -3,7 +3,7 @@ import com.linkedin.davinci.stats.AbstractVeniceAggVersionedStats; import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.meta.Store; -import com.linkedin.venice.stats.StatsSupplier; +import com.linkedin.venice.stats.StatsSupplierMetricsRepository; import io.tehuti.metrics.MetricsRepository; import java.util.Map; import java.util.function.Supplier; @@ -18,7 +18,7 @@ public HeartbeatVersionedStats( MetricsRepository metricsRepository, ReadOnlyStoreRepository metadataRepository, Supplier statsInitiator, - StatsSupplier reporterSupplier, + StatsSupplierMetricsRepository reporterSupplier, Map>>>> leaderMonitors, Map>>>> followerMonitors) { super(metricsRepository, metadataRepository, statsInitiator, reporterSupplier, true); diff --git a/internal/venice-client-common/build.gradle b/internal/venice-client-common/build.gradle index 3ac9a15dc7..9d62bb9260 100644 --- a/internal/venice-client-common/build.gradle +++ b/internal/venice-client-common/build.gradle @@ -39,6 +39,11 @@ dependencies { implementation libraries.log4j2api implementation libraries.zstd implementation libraries.conscrypt + implementation libraries.opentelemetryApi + implementation libraries.opentelemetrySdk + implementation libraries.opentelemetryExporterLogging + implementation libraries.opentelemetryExporterOtlp + implementation libraries.opentelemetryExporterCommon testImplementation project(':internal:venice-test-common') testImplementation project(':clients:venice-thin-client') diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java index b426ee1aab..b734b017ab 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/read/RequestType.java @@ -1,16 +1,23 @@ package com.linkedin.venice.read; public enum RequestType { - SINGLE_GET(""), MULTI_GET("multiget_"), MULTI_GET_STREAMING("multiget_streaming_"), COMPUTE("compute_"), - COMPUTE_STREAMING("compute_streaming_"); + SINGLE_GET("", "single_get"), MULTI_GET("multiget_", "multi_get"), + MULTI_GET_STREAMING("multiget_streaming_", "multi_get_streaming"), COMPUTE("compute_", "compute"), + COMPUTE_STREAMING("compute_streaming_", "compute_streaming"); private String metricPrefix; + private String requestTypeName; - RequestType(String metricPrefix) { + RequestType(String metricPrefix, String requestTypeName) { this.metricPrefix = metricPrefix; + this.requestTypeName = requestTypeName; } public String getMetricPrefix() { return this.metricPrefix; } + + public String getRequestTypeName() { + return this.requestTypeName; + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java index 03a87a4bfe..9255f18731 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStats.java @@ -10,40 +10,81 @@ public abstract class AbstractVeniceAggStats { protected T totalStats; protected final Map storeStats = new VeniceConcurrentHashMap<>(); - private StatsSupplier statsFactory; + private StatsSupplierMetricsRepository statsFactoryMetricsRepository; + private StatsSupplierVeniceMetricsRepository statsFactoryVeniceMetricsRepository; + private final MetricsRepository metricsRepository; + private String clusterName = null; - private AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier, T totalStats) { + private AbstractVeniceAggStats( + MetricsRepository metricsRepository, + StatsSupplierMetricsRepository statsSupplier, + T totalStats) { + this.metricsRepository = metricsRepository; + this.statsFactoryMetricsRepository = statsSupplier; + this.totalStats = totalStats; + } + + private AbstractVeniceAggStats( + VeniceMetricsRepository metricsRepository, + StatsSupplierVeniceMetricsRepository statsSupplier, + String clusterName, + T totalStats) { this.metricsRepository = metricsRepository; - this.statsFactory = statsSupplier; + this.statsFactoryVeniceMetricsRepository = statsSupplier; + this.clusterName = clusterName; this.totalStats = totalStats; } - public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplier statsSupplier) { + public AbstractVeniceAggStats(MetricsRepository metricsRepository, StatsSupplierMetricsRepository statsSupplier) { this(metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null)); } - public AbstractVeniceAggStats(MetricsRepository metricsRepository) { + public AbstractVeniceAggStats( + StatsSupplierVeniceMetricsRepository statsSupplier, + VeniceMetricsRepository metricsRepository, + String clusterName) { + this( + metricsRepository, + statsSupplier, + clusterName, + statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null)); + } + + public AbstractVeniceAggStats(MetricsRepository metricsRepository, String clusterName) { this.metricsRepository = metricsRepository; + this.clusterName = clusterName; } - public void setStatsSupplier(StatsSupplier statsSupplier) { - this.statsFactory = statsSupplier; - this.totalStats = statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT, null); + public void setStatsSupplier(StatsSupplierVeniceMetricsRepository statsSupplier) { + this.statsFactoryVeniceMetricsRepository = statsSupplier; + if (metricsRepository instanceof VeniceMetricsRepository) { + this.totalStats = + statsSupplier.get((VeniceMetricsRepository) metricsRepository, STORE_NAME_FOR_TOTAL_STAT, clusterName, null); + } } public AbstractVeniceAggStats( String clusterName, MetricsRepository metricsRepository, - StatsSupplier statsSupplier) { + StatsSupplierMetricsRepository statsSupplier) { this( metricsRepository, statsSupplier, statsSupplier.get(metricsRepository, STORE_NAME_FOR_TOTAL_STAT + "." + clusterName, null)); + this.clusterName = clusterName; } public T getStoreStats(String storeName) { - return storeStats.computeIfAbsent(storeName, k -> statsFactory.get(metricsRepository, storeName, totalStats)); + if (metricsRepository instanceof VeniceMetricsRepository) { + return storeStats.computeIfAbsent( + storeName, + k -> statsFactoryVeniceMetricsRepository + .get((VeniceMetricsRepository) metricsRepository, storeName, clusterName, totalStats)); + } else { + return storeStats + .computeIfAbsent(storeName, k -> statsFactoryMetricsRepository.get(metricsRepository, storeName, totalStats)); + } } public T getNullableStoreStats(String storeName) { diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java similarity index 89% rename from internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java rename to internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java index 9967d93c3f..208fc4470e 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplier.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierMetricsRepository.java @@ -3,7 +3,7 @@ import io.tehuti.metrics.MetricsRepository; -public interface StatsSupplier { +public interface StatsSupplierMetricsRepository { /** * Legacy function, for implementations that do not use total stats in their constructor. * diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java new file mode 100644 index 0000000000..c604515b89 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/StatsSupplierVeniceMetricsRepository.java @@ -0,0 +1,19 @@ +package com.linkedin.venice.stats; + +/** copy of {@link StatsSupplierMetricsRepository} for {@link VeniceMetricsRepository} */ +public interface StatsSupplierVeniceMetricsRepository { + /** + * Legacy function, for implementations that do not use total stats in their constructor. + * + * @see #get(VeniceMetricsRepository, String, String, AbstractVeniceStats) which is the only caller. + */ + T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName); + + /** + * This is the function that gets called by {@link AbstractVeniceAggStats}, and concrete classes can + * optionally implement it in order to be provided with the total stats instance. + */ + default T get(VeniceMetricsRepository metricsRepository, String storeName, String clusterName, T totalStats) { + return get(metricsRepository, storeName, clusterName); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java index 7a5cf212cd..7ef5a7bdf5 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/TehutiUtils.java @@ -9,6 +9,7 @@ import io.tehuti.metrics.stats.Percentiles; import io.tehuti.metrics.stats.Rate; import java.util.Arrays; +import java.util.Map; /** @@ -129,6 +130,19 @@ public static MetricsRepository getMetricsRepository(String serviceName) { return metricsRepository; } + public static VeniceMetricsRepository getVeniceMetricsRepository( + String serviceName, + String metricPrefix, + Map configs) { + VeniceMetricsRepository metricsRepository = new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName(serviceName) + .setMetricPrefix(metricPrefix) + .extractAndSetOtelConfigs(configs) + .build()); + metricsRepository.addReporter(new JmxReporter(serviceName)); + return metricsRepository; + } + /** * A valid metric name needs to pass the test in {@link javax.management.ObjectName}. This helper function will * try to fix all invalid character mentioned in the above function to avoid MalformedObjectNameException; besides, diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java new file mode 100644 index 0000000000..586143f276 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsConfig.java @@ -0,0 +1,235 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.exceptions.VeniceException; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.tehuti.metrics.MetricConfig; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + +public class VeniceMetricsConfig { + private static final Logger LOGGER = LogManager.getLogger(VeniceMetricsConfig.class); + private final String serviceName; + private final String metricPrefix; + /** config to control whether to emit OpenTelemetry or tehuti metrics or both + * emitTehutiMetrics is not used for now */ + private final boolean emitOpenTelemetryMetrics; + private final boolean emitTehutiMetrics; + + /** extra configs for OpenTelemetry. Supports 2 exporter currently
+ * 1. {@link MetricExporter} for exporting to Http/Grpc endpoint. More details are supported via configs, + * check {@link VeniceMetricsConfigBuilder#extractAndSetOtelConfigs} and {@link VeniceOpenTelemetryMetricsRepository#getOtlpHttpMetricExporter}
+ * 2. {@link VeniceOpenTelemetryMetricsRepository.LogBasedMetricExporter} for debug purposes + */ + private final Map otelConfigs; + private final boolean emitToHttpGrpcEndpoint; + private final boolean emitToLog; // for debug purposes + private final VeniceOpenTelemetryMetricFormat metricFormat; + private final boolean useExponentialHistogram; + private final int exponentialHistogramMaxScale; + private final int exponentialHistogramMaxBuckets; + + /** reusing tehuti's MetricConfig */ + private final MetricConfig tehutiMetricConfig; + + private VeniceMetricsConfig(VeniceMetricsConfigBuilder veniceMetricsConfigBuilder) { + this.serviceName = veniceMetricsConfigBuilder.serviceName; + this.metricPrefix = veniceMetricsConfigBuilder.metricPrefix; + this.emitOpenTelemetryMetrics = veniceMetricsConfigBuilder.emitOpenTelemetryMetrics; + this.emitTehutiMetrics = veniceMetricsConfigBuilder.emitTehutiMetrics; + this.emitToHttpGrpcEndpoint = veniceMetricsConfigBuilder.emitToHttpGrpcEndpoint; + this.emitToLog = veniceMetricsConfigBuilder.emitToLog; + this.metricFormat = veniceMetricsConfigBuilder.metricFormat; + this.useExponentialHistogram = veniceMetricsConfigBuilder.useExponentialHistogram; + this.exponentialHistogramMaxScale = veniceMetricsConfigBuilder.exponentialHistogramMaxScale; + this.exponentialHistogramMaxBuckets = veniceMetricsConfigBuilder.exponentialHistogramMaxBuckets; + this.otelConfigs = veniceMetricsConfigBuilder.otelConfigs; + this.tehutiMetricConfig = veniceMetricsConfigBuilder.tehutiMetricConfig; + } + + public static class VeniceMetricsConfigBuilder { + private String serviceName = "noop_service"; + private String metricPrefix = null; + private boolean emitOpenTelemetryMetrics = false; + private boolean emitTehutiMetrics = true; + private boolean emitToHttpGrpcEndpoint = false; + private boolean emitToLog = false; + private VeniceOpenTelemetryMetricFormat metricFormat = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; + private boolean useExponentialHistogram = true; + private int exponentialHistogramMaxScale = 3; + private int exponentialHistogramMaxBuckets = 250; + private Map otelConfigs = new HashMap<>(); + private MetricConfig tehutiMetricConfig = null; + + public VeniceMetricsConfigBuilder setServiceName(String serviceName) { + this.serviceName = serviceName; + return this; + } + + public VeniceMetricsConfigBuilder setMetricPrefix(String metricPrefix) { + this.metricPrefix = metricPrefix; + return this; + } + + public VeniceMetricsConfigBuilder setEmitOpenTelemetryMetrics(boolean emitOpenTelemetryMetrics) { + this.emitOpenTelemetryMetrics = emitOpenTelemetryMetrics; + return this; + } + + public VeniceMetricsConfigBuilder setEmitTehutiMetrics(boolean emitTehutiMetrics) { + this.emitTehutiMetrics = emitTehutiMetrics; + return this; + } + + public VeniceMetricsConfigBuilder setEmitToHttpGrpcEndpoint(boolean emitToHttpGrpcEndpoint) { + this.emitToHttpGrpcEndpoint = emitToHttpGrpcEndpoint; + return this; + } + + public VeniceMetricsConfigBuilder setEmitToLog(boolean emitToLog) { + this.emitToLog = emitToLog; + return this; + } + + public VeniceMetricsConfigBuilder setMetricFormat(VeniceOpenTelemetryMetricFormat metricFormat) { + this.metricFormat = metricFormat; + return this; + } + + public VeniceMetricsConfigBuilder extractAndSetOtelConfigs(Map configs) { + // copy only OpenTelemetry related configs + for (Map.Entry entry: configs.entrySet()) { + if (entry.getKey().startsWith("otel.")) { + otelConfigs.put(entry.getKey(), entry.getValue()); + } + } + LOGGER.info("OpenTelemetry configs: {}", otelConfigs); + return this; + } + + public VeniceMetricsConfigBuilder setTehutiMetricConfig(MetricConfig tehutiMetricConfig) { + this.tehutiMetricConfig = tehutiMetricConfig; + return this; + } + + /** get the last part of the service name + * For instance: if service name is "venice-router", return "router" + */ + public static String getMetricsPrefix(String input) { + String[] parts = input.split("[\\-\\._]"); + String lastPart = parts[parts.length - 1]; + return lastPart; + } + + // Validate required fields before building + private void checkAndSetDefaults() { + if (tehutiMetricConfig == null) { + setTehutiMetricConfig(new MetricConfig()); + } + if (metricPrefix == null) { + setMetricPrefix(getMetricsPrefix(serviceName)); + } + if (otelConfigs.containsKey("otel.venice.enabled")) { + String status = otelConfigs.get("otel.venice.enabled"); + if (status != null) { + setEmitOpenTelemetryMetrics(status.toLowerCase(Locale.ROOT).equals("true")); + } + } + // check otelConfigs and set defaults + if (emitOpenTelemetryMetrics) { + if (otelConfigs.containsKey("otel.venice.export.to.log")) { + String emitStatus = otelConfigs.get("otel.venice.export.to.log"); + if (emitStatus != null) { + setEmitToLog(emitStatus.toLowerCase(Locale.ROOT).equals("true")); + } + } + if (otelConfigs.containsKey("otel.venice.export.to.http.grpc.endpoint")) { + String emitStatus = otelConfigs.get("otel.venice.export.to.http.grpc.endpoint"); + if (emitStatus != null) { + setEmitToHttpGrpcEndpoint(emitStatus.toLowerCase(Locale.ROOT).equals("true")); + } + } + if (otelConfigs.containsKey("otel.venice.metrics.format")) { + String format = otelConfigs.get("otel.venice.metrics.format"); + if (format != null) { + try { + setMetricFormat(VeniceOpenTelemetryMetricFormat.valueOf(format.toUpperCase(Locale.ROOT))); + } catch (IllegalArgumentException e) { + LOGGER.warn("Invalid metric format: {}, setting to default: {}", format, metricFormat); + } + } + } + if (emitToHttpGrpcEndpoint) { + if (!otelConfigs.containsKey("otel.exporter.otlp.metrics.protocol") + || !otelConfigs.containsKey("otel.exporter.otlp.metrics.endpoint")) { + throw new VeniceException( + "otel settings missing for otel.exporter.otlp.metrics.protocol and otel.exporter.otlp.metrics.endpoint"); + } + } + } + } + + public VeniceMetricsConfig build() { + checkAndSetDefaults(); + return new VeniceMetricsConfig(this); + } + } + + // all getters + public String getServiceName() { + return this.serviceName; + } + + public String getMetricPrefix() { + return this.metricPrefix; + } + + public boolean isEmitOpenTelemetryMetrics() { + return emitOpenTelemetryMetrics; + } + + public boolean isEmitToHttpGrpcEndpoint() { + return emitToHttpGrpcEndpoint; + } + + public boolean isEmitToLog() { + return emitToLog; + } + + public VeniceOpenTelemetryMetricFormat getMetricFormat() { + return metricFormat; + } + + public boolean isUseExponentialHistogram() { + return useExponentialHistogram; + } + + public int getExponentialHistogramMaxScale() { + return exponentialHistogramMaxScale; + } + + public int getExponentialHistogramMaxBuckets() { + return exponentialHistogramMaxBuckets; + } + + public Map getOtelConfigs() { + return otelConfigs; + } + + public MetricConfig getTehutiMetricConfig() { + return tehutiMetricConfig; + } + + @Override + public String toString() { + return "VeniceMetricsConfig{" + "serviceName='" + serviceName + '\'' + ", metricPrefix='" + metricPrefix + '\'' + + ", emitOpenTelemetryMetrics=" + emitOpenTelemetryMetrics + ", emitTehutiMetrics=" + emitTehutiMetrics + + ", otelConfigs=" + otelConfigs + ", emitToHttpGrpcEndpoint=" + emitToHttpGrpcEndpoint + ", emitToLog=" + + emitToLog + ", metricFormat=" + metricFormat + ", useExponentialHistogram=" + useExponentialHistogram + + ", exponentialHistogramMaxScale=" + exponentialHistogramMaxScale + ", exponentialHistogramMaxBuckets=" + + exponentialHistogramMaxBuckets + ", tehutiMetricConfig=" + tehutiMetricConfig + '}'; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java new file mode 100644 index 0000000000..18110e69be --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceMetricsRepository.java @@ -0,0 +1,192 @@ +package com.linkedin.venice.stats; + +import io.tehuti.Metric; +import io.tehuti.metrics.Measurable; +import io.tehuti.metrics.MetricConfig; +import io.tehuti.metrics.MetricsReporter; +import io.tehuti.metrics.MetricsRepository; +import io.tehuti.metrics.Sensor; +import io.tehuti.metrics.stats.AsyncGauge; +import java.io.Closeable; +import java.util.Map; +import java.util.Objects; + + +/** extends MetricsRepository to keep the changes to a minimum. Next step would be to create a MetricsRepository inside rather than extending it */ +public class VeniceMetricsRepository extends MetricsRepository implements Closeable { + private MetricsRepository delegate = null; + private VeniceMetricsConfig veniceMetricsConfig; + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository; + + public VeniceMetricsRepository() { + super(); + this.veniceMetricsConfig = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository(VeniceMetricsConfig veniceMetricsConfig) { + super(veniceMetricsConfig.getTehutiMetricConfig()); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository( + VeniceMetricsConfig veniceMetricsConfig, + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository) { + super(veniceMetricsConfig.getTehutiMetricConfig()); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; + } + + /** if MetricsRepository is passed in, then use it as the delegate, can be removed after the migration */ + public VeniceMetricsRepository(MetricsRepository metricsRepository, VeniceMetricsConfig veniceMetricsConfig) { + this.delegate = Objects.requireNonNull(metricsRepository); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = new VeniceOpenTelemetryMetricsRepository(veniceMetricsConfig); + } + + public VeniceMetricsRepository( + MetricsRepository metricsRepository, + VeniceMetricsConfig veniceMetricsConfig, + VeniceOpenTelemetryMetricsRepository openTelemetryMetricsRepository) { + this.delegate = Objects.requireNonNull(metricsRepository); + this.veniceMetricsConfig = veniceMetricsConfig; + this.openTelemetryMetricsRepository = openTelemetryMetricsRepository; + } + + public VeniceOpenTelemetryMetricsRepository getOpenTelemetryMetricsRepository() { + return this.openTelemetryMetricsRepository; + } + + public VeniceMetricsConfig getVeniceMetricsConfig() { + return veniceMetricsConfig; + } + + @Override + public void close() { + if (delegate != null) { + delegate.close(); + } else { + super.close(); + } + openTelemetryMetricsRepository.close(); + } + + // all other overrides from MetricsRepository to use delegate + @Override + public Sensor getSensor(String name) { + if (delegate != null) { + return delegate.getSensor(name); + } else { + return super.getSensor(name); + } + } + + @Override + public Sensor sensor(String name) { + if (delegate != null) { + return delegate.sensor(name); + } else { + return super.sensor(name); + } + } + + @Override + public Sensor sensor(String name, Sensor... parents) { + if (delegate != null) { + return delegate.sensor(name, parents); + } else { + return super.sensor(name, parents); + } + } + + @Override + public synchronized Sensor sensor(String name, MetricConfig config, Sensor... parents) { + if (delegate != null) { + return delegate.sensor(name, config, parents); + } else { + return super.sensor(name, config, parents); + } + } + + @Override + public synchronized void removeSensor(String name) { + if (delegate != null) { + delegate.removeSensor(name); + } else { + super.removeSensor(name); + } + } + + @Override + public Metric addMetric(String name, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, measurable); + } else { + return super.addMetric(name, measurable); + } + } + + @Override + public Metric addMetric(String name, String description, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, description, measurable); + } else { + return super.addMetric(name, description, measurable); + } + } + + @Override + public Metric addMetric(String name, MetricConfig config, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, config, measurable); + } else { + return super.addMetric(name, config, measurable); + } + } + + @Override + public synchronized Metric addMetric(String name, String description, MetricConfig config, Measurable measurable) { + if (delegate != null) { + return delegate.addMetric(name, description, config, measurable); + } else { + return super.addMetric(name, description, config, measurable); + } + } + + @Override + public synchronized void addReporter(MetricsReporter reporter) { + if (delegate != null) { + delegate.addReporter(reporter); + } else { + super.addReporter(reporter); + } + } + + @Override + public Map metrics() { + if (delegate != null) { + return delegate.metrics(); + } else { + return super.metrics(); + } + } + + @Override + public Metric getMetric(String name) { + if (delegate != null) { + return delegate.getMetric(name); + } else { + return super.getMetric(name); + } + } + + @Override + public AsyncGauge.AsyncGaugeExecutor getAsyncGaugeExecutor() { + if (delegate != null) { + return delegate.getAsyncGaugeExecutor(); + } else { + return super.getAsyncGaugeExecutor(); + } + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java new file mode 100644 index 0000000000..bc3e06c19d --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryConfigProperties.java @@ -0,0 +1,284 @@ +package com.linkedin.venice.stats; + +import static java.util.stream.Collectors.groupingBy; +import static java.util.stream.Collectors.joining; + +import io.opentelemetry.api.internal.ConfigUtil; +import io.opentelemetry.api.internal.StringUtils; +import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties; +import io.opentelemetry.sdk.autoconfigure.spi.ConfigurationException; +import java.time.Duration; +import java.util.AbstractMap; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + + +/** + * copy of {@link io.opentelemetry.sdk.autoconfigure.spi.internal.DefaultConfigProperties} with + * modification to {@link #getList} to support comma separated values for 1 key. + * + * In original class: comma separated values will be considered as different key values + * In new class: comma separated values will be considered as 1 key value + * + * Copied the entire class DefaultConfigProperties rather than extending it because it is final class. + */ +public class VeniceOpenTelemetryConfigProperties implements ConfigProperties { + /** + * Properties are normalized to The properties for both of these will be normalized to be all lower + * case, dashses are replaces with periods, and environment variable underscores are replaces with + * periods. + * + *

This class is internal and is hence not for public use. Its APIs are unstable and can change + * at any time. + */ + + private final Map config; + + /** + * Creates a {@link VeniceOpenTelemetryConfigProperties} by merging system properties, environment variables, + * and the {@code defaultProperties}. + * + *

Environment variables take priority over {@code defaultProperties}. System properties take + * priority over environment variables. + */ + public static VeniceOpenTelemetryConfigProperties create(Map defaultProperties) { + return new VeniceOpenTelemetryConfigProperties(System.getProperties(), System.getenv(), defaultProperties); + } + + /** + * Create a {@link VeniceOpenTelemetryConfigProperties} from the {@code properties}, ignoring system + * properties and environment variables. + */ + public static VeniceOpenTelemetryConfigProperties createFromMap(Map properties) { + return new VeniceOpenTelemetryConfigProperties(properties, Collections.emptyMap(), Collections.emptyMap()); + } + + private VeniceOpenTelemetryConfigProperties( + Map systemProperties, + Map environmentVariables, + Map defaultProperties) { + Map config = new HashMap<>(); + defaultProperties.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); + environmentVariables.forEach((name, value) -> config.put(ConfigUtil.normalizeEnvironmentVariableKey(name), value)); + systemProperties + .forEach((key, value) -> config.put(ConfigUtil.normalizePropertyKey(key.toString()), value.toString())); + + this.config = config; + } + + private VeniceOpenTelemetryConfigProperties( + VeniceOpenTelemetryConfigProperties previousProperties, + Map overrides) { + // previousProperties are already normalized, they can be copied as they are + Map config = new HashMap<>(previousProperties.config); + overrides.forEach((name, value) -> config.put(ConfigUtil.normalizePropertyKey(name), value)); + + this.config = config; + } + + @Override + @Nullable + public String getString(String name) { + return config.get(ConfigUtil.normalizePropertyKey(name)); + } + + @Override + @Nullable + public Boolean getBoolean(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + return Boolean.parseBoolean(value); + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Integer getInt(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Integer.parseInt(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "integer"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Long getLong(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Long.parseLong(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "long"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Double getDouble(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + try { + return Double.parseDouble(value); + } catch (NumberFormatException ex) { + throw newInvalidPropertyException(name, value, "double"); + } + } + + @Override + @Nullable + @SuppressWarnings("UnusedException") + public Duration getDuration(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null || value.isEmpty()) { + return null; + } + String unitString = getUnitString(value); + // TODO: Environment variables have unknown encoding. `trim()` may cut codepoints oddly + // but likely we'll fail for malformed unit string either way. + String numberString = value.substring(0, value.length() - unitString.length()); + try { + long rawNumber = Long.parseLong(numberString.trim()); + TimeUnit unit = getDurationUnit(unitString.trim()); + return Duration.ofNanos(TimeUnit.NANOSECONDS.convert(rawNumber, unit)); + } catch (NumberFormatException ex) { + throw new ConfigurationException( + "Invalid duration property " + name + "=" + value + ". Expected number, found: " + numberString, + ex); + } catch (ConfigurationException ex) { + throw new ConfigurationException("Invalid duration property " + name + "=" + value + ". " + ex.getMessage()); + } + } + + @Override + public List getList(String name) { + String value = config.get(ConfigUtil.normalizePropertyKey(name)); + if (value == null) { + return Collections.emptyList(); + } + // TODO this is a bit of a hack to support comma separated value for 1 key to be passed in header + // return filterBlanksAndNulls(value.split(",")); + return filterBlanksAndNulls(new String[] { value }); + } + + /** + * Returns {@link ConfigProperties#getList(String)} as a {@link Set} after validating there are no + * duplicate entries. + * + * @throws ConfigurationException if {@code name} contains duplicate entries + */ + public static Set getSet(ConfigProperties config, String name) { + List list = config.getList(ConfigUtil.normalizePropertyKey(name)); + Set set = new HashSet<>(list); + if (set.size() != list.size()) { + String duplicates = list.stream() + .collect(groupingBy(Function.identity(), Collectors.counting())) + .entrySet() + .stream() + .filter(entry -> entry.getValue() > 1) + .map(Map.Entry::getKey) + .collect(joining(",", "[", "]")); + throw new ConfigurationException(name + " contains duplicates: " + duplicates); + } + return set; + } + + @Override + public Map getMap(String name) { + return getList(ConfigUtil.normalizePropertyKey(name)).stream().map(entry -> { + String[] split = entry.split("=", 2); + if (split.length != 2 || StringUtils.isNullOrEmpty(split[0])) { + throw new ConfigurationException("Invalid map property: " + name + "=" + config.get(name)); + } + return filterBlanksAndNulls(split); + }) + // Filter entries with an empty value, i.e. "foo=" + .filter(splitKeyValuePairs -> splitKeyValuePairs.size() == 2) + .map( + splitKeyValuePairs -> new AbstractMap.SimpleImmutableEntry<>( + splitKeyValuePairs.get(0), + splitKeyValuePairs.get(1))) + // If duplicate keys, prioritize later ones similar to duplicate system properties on a + // Java command line. + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (first, next) -> next, LinkedHashMap::new)); + } + + /** + * Return a new {@link VeniceOpenTelemetryConfigProperties} by overriding the {@code previousProperties} with + * the {@code overrides}. + */ + public VeniceOpenTelemetryConfigProperties withOverrides(Map overrides) { + return new VeniceOpenTelemetryConfigProperties(this, overrides); + } + + private static ConfigurationException newInvalidPropertyException(String name, String value, String type) { + throw new ConfigurationException("Invalid value for property " + name + "=" + value + ". Must be a " + type + "."); + } + + private static List filterBlanksAndNulls(String[] values) { + return Arrays.stream(values).map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList()); + } + + /** Returns the TimeUnit associated with a unit string. Defaults to milliseconds. */ + private static TimeUnit getDurationUnit(String unitString) { + switch (unitString) { + case "us": + return TimeUnit.MICROSECONDS; + case "ns": + return TimeUnit.NANOSECONDS; + case "": // Fallthrough expected + case "ms": + return TimeUnit.MILLISECONDS; + case "s": + return TimeUnit.SECONDS; + case "m": + return TimeUnit.MINUTES; + case "h": + return TimeUnit.HOURS; + case "d": + return TimeUnit.DAYS; + default: + throw new ConfigurationException("Invalid duration string, found: " + unitString); + } + } + + /** + * Fragments the 'units' portion of a config value from the 'value' portion. + * + *

E.g. "1ms" would return the string "ms". + */ + private static String getUnitString(String rawValue) { + int lastDigitIndex = rawValue.length() - 1; + while (lastDigitIndex >= 0) { + char c = rawValue.charAt(lastDigitIndex); + if (Character.isDigit(c)) { + break; + } + lastDigitIndex -= 1; + } + // Pull everything after the last digit. + return rawValue.substring(lastDigitIndex + 1); + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java new file mode 100644 index 0000000000..27bb283eba --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricFormat.java @@ -0,0 +1,34 @@ +package com.linkedin.venice.stats; + +import com.linkedin.venice.utils.VeniceEnumValue; + + +public enum VeniceOpenTelemetryMetricFormat implements VeniceEnumValue { + /** + * Default format if not configured, names are defined as per this. + * should use snake case as per https://opentelemetry.io/docs/specs/semconv/general/attribute-naming/ + * For example: http.response.status_code + */ + SNAKE_CASE(0), + /** + * Alternate format for attribute names. If configured, defined names in snake_case will be + * transformed to either one of below formats. + * + * camel case: For example, http.response.statusCode + * pascal case: For example, Http.Response.StatusCode + */ + CAMEL_CASE(1), PASCAL_CASE(2); + + private final int value; + + VeniceOpenTelemetryMetricFormat(int value) { + this.value = value; + } + + public static final int SIZE = values().length; + + @Override + public int getValue() { + return value; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java new file mode 100644 index 0000000000..8a8b26c1f3 --- /dev/null +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepository.java @@ -0,0 +1,297 @@ +package com.linkedin.venice.stats; + +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; +import static io.opentelemetry.sdk.metrics.data.AggregationTemporality.DELTA; + +import com.linkedin.venice.exceptions.VeniceException; +import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; +import io.opentelemetry.api.OpenTelemetry; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.DoubleHistogramBuilder; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.api.metrics.LongCounterBuilder; +import io.opentelemetry.api.metrics.Meter; +import io.opentelemetry.exporter.otlp.internal.OtlpMetricExporterProvider; +import io.opentelemetry.sdk.OpenTelemetrySdk; +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.Aggregation; +import io.opentelemetry.sdk.metrics.InstrumentSelector; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.SdkMeterProvider; +import io.opentelemetry.sdk.metrics.SdkMeterProviderBuilder; +import io.opentelemetry.sdk.metrics.View; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.metrics.export.PeriodicMetricReader; +import io.opentelemetry.sdk.resources.Resource; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + + +public class VeniceOpenTelemetryMetricsRepository { + private static final Logger LOGGER = LogManager.getLogger(VeniceOpenTelemetryMetricsRepository.class); + private OpenTelemetry openTelemetry = null; + private SdkMeterProvider sdkMeterProvider = null; + private boolean emitOpenTelemetryMetrics; + private VeniceOpenTelemetryMetricFormat metricFormat; + private Meter meter; + + private String metricPrefix; + + /** Below Maps are to create only one metric per name and type: Venice code will try to initialize the same metric multiple times as it will get + * called from per store path and per request type path. This will ensure that we only have one metric per name and + * use dimensions to differentiate between them. + */ + private final VeniceConcurrentHashMap histogramMap = new VeniceConcurrentHashMap<>(); + private final VeniceConcurrentHashMap counterMap = new VeniceConcurrentHashMap<>(); + + MetricExporter getOtlpHttpMetricExporter(VeniceMetricsConfig metricsConfig) { + OtlpMetricExporterProvider otlpMetricExporterProvider = new OtlpMetricExporterProvider(); + VeniceOpenTelemetryConfigProperties config = + VeniceOpenTelemetryConfigProperties.createFromMap(metricsConfig.getOtelConfigs()); + return otlpMetricExporterProvider.createExporter(config); + } + + public VeniceOpenTelemetryMetricsRepository(VeniceMetricsConfig metricsConfig) { + emitOpenTelemetryMetrics = metricsConfig.isEmitOpenTelemetryMetrics(); + metricFormat = metricsConfig.getMetricFormat(); + if (!emitOpenTelemetryMetrics) { + LOGGER.info("OpenTelemetry metrics are disabled"); + return; + } + LOGGER.info( + "OpenTelemetry initialization for {} started with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString()); + this.metricPrefix = transformMetricName("venice." + metricsConfig.getMetricPrefix()); + + try { + SdkMeterProviderBuilder builder = SdkMeterProvider.builder(); + if (metricsConfig.isEmitToHttpGrpcEndpoint()) { + MetricExporter httpExporter = getOtlpHttpMetricExporter(metricsConfig); + builder.registerMetricReader(PeriodicMetricReader.builder(httpExporter).build()); + } + if (metricsConfig.isEmitToLog()) { + // internal to test: Disabled by default + builder.registerMetricReader(PeriodicMetricReader.builder(new LogBasedMetricExporter()).build()); + } + if (metricsConfig.isUseExponentialHistogram()) { + /** + * {@link io.opentelemetry.exporter.internal.ExporterBuilderUtil#configureHistogramDefaultAggregation} + * doesn't take in buckets and scale configs. so using the below for now rather than passing these as + * configs to {@link #getOtlpHttpMetricExporter} + */ + builder.registerView( + InstrumentSelector.builder().setName("*").setType(InstrumentType.HISTOGRAM).build(), + View.builder() + .setAggregation( + Aggregation.base2ExponentialBucketHistogram( + metricsConfig.getExponentialHistogramMaxBuckets(), + metricsConfig.getExponentialHistogramMaxScale())) + .build()); + } + + builder.setResource(Resource.empty()); + sdkMeterProvider = builder.build(); + + // Register MeterProvider with OpenTelemetry instance + openTelemetry = OpenTelemetrySdk.builder().setMeterProvider(sdkMeterProvider).build(); + + this.meter = openTelemetry.getMeter(getMetricPrefix()); + LOGGER.info( + "OpenTelemetry initialization for {} completed with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString()); + } catch (Exception e) { + LOGGER.info( + "OpenTelemetry initialization for {} failed with config: {}", + metricsConfig.getServiceName(), + metricsConfig.toString(), + e); + throw new VeniceException("OpenTelemetry initialization for " + metricsConfig.getServiceName() + " failed", e); + } + } + + /** + * validate whether the metric name is a valid {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} + */ + public static void validateMetricName(String name) { + if (name == null || name.isEmpty()) { + throw new IllegalArgumentException("Metric name cannot be null or empty. Input name: " + name); + } + if (name.contains(" ")) { + throw new IllegalArgumentException("Metric name cannot contain spaces. Input name: " + name); + } + // name should not contain any capital or special characters except for underscore and dot + if (!name.matches("^[a-z0-9_.]*$")) { + throw new IllegalArgumentException( + "Metric name can only contain lowercase alphabets, numbers, underscore and dot. Input name: " + name); + } + } + + String getFullMetricName(String metricPrefix, String name) { + String fullMetricName = metricPrefix + "." + name; + validateMetricName(fullMetricName); + return transformMetricName(fullMetricName); + } + + private String getMetricPrefix() { + return metricPrefix; + } + + /** + * Input should already be in {@link VeniceOpenTelemetryMetricFormat#SNAKE_CASE} as validated + * in {@link #validateMetricName}. + * + * If configured a different format, return the transformed format + */ + private String transformMetricName(String input) { + switch (metricFormat) { + case SNAKE_CASE: + return input; // input should be already in snake_case + case PASCAL_CASE: + case CAMEL_CASE: + return transformMetricName(input, metricFormat); + default: + throw new IllegalArgumentException("Unsupported metric format: " + metricFormat); + } + } + + public static String transformMetricName(String input, VeniceOpenTelemetryMetricFormat metricFormat) { + String[] words = input.split("\\."); + for (int i = 0; i < words.length; i++) { + if (!words[i].isEmpty()) { + String[] partWords = words[i].split("_"); + for (int j = 0; j < partWords.length; j++) { + if (metricFormat == PASCAL_CASE || j > 0) { + // either pascal case or camel case except for the first word + partWords[j] = capitalizeFirstLetter(partWords[j]); + } + } + StringBuilder sb = new StringBuilder(); + for (String partWord: partWords) { + sb.append(partWord); + } + words[i] = sb.toString(); + } + } + StringBuilder finalName = new StringBuilder(); + for (String word: words) { + finalName.append(word); + finalName.append("."); + } + // remove the last dot + if (finalName.length() > 0) { + finalName.deleteCharAt(finalName.length() - 1); + } + return finalName.toString(); + } + + private static String capitalizeFirstLetter(String word) { + if (word.isEmpty()) { + return word; + } + return Character.toUpperCase(word.charAt(0)) + word.substring(1); + } + + public DoubleHistogram getHistogram(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return histogramMap.computeIfAbsent(name, key -> { + DoubleHistogramBuilder builder = + meter.histogramBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public DoubleHistogram getHistogramWithoutBuckets(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return histogramMap.computeIfAbsent(name, key -> { + DoubleHistogramBuilder builder = meter.histogramBuilder(fullMetricName) + .setExplicitBucketBoundariesAdvice(new ArrayList<>()) + .setUnit(unit) + .setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public LongCounter getCounter(String name, String unit, String description) { + if (emitOpenTelemetryMetrics) { + String fullMetricName = getFullMetricName(getMetricPrefix(), name); + if (openTelemetry != null) { + return counterMap.computeIfAbsent(name, key -> { + LongCounterBuilder builder = meter.counterBuilder(fullMetricName).setUnit(unit).setDescription(description); + return builder.build(); + }); + } else { + LOGGER.error("Metric instrument creation failed for metric {} because OpenTelemetry is not initialized", name); + return null; + } + } else { + return null; + } + } + + public void close() { + LOGGER.info("OpenTelemetry close"); + sdkMeterProvider.shutdown(); + sdkMeterProvider = null; + } + + static class LogBasedMetricExporter implements MetricExporter { + @Override + public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { + return DELTA; + } + + @Override + public CompletableResultCode export(Collection metrics) { + LOGGER.info("Logging OpenTelemetry metrics for debug purpose: {}", Arrays.toString(metrics.toArray())); + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode flush() { + return CompletableResultCode.ofSuccess(); + } + + @Override + public CompletableResultCode shutdown() { + return CompletableResultCode.ofSuccess(); + } + } + + // for testing purpose + public SdkMeterProvider getSdkMeterProvider() { + return sdkMeterProvider; + } + + public OpenTelemetry getOpenTelemetry() { + return openTelemetry; + } + + public Meter getMeter() { + return meter; + } +} diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java index e9855ece93..06bdb5d03c 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/VeniceProperties.java @@ -458,4 +458,8 @@ public Properties toProperties() { public boolean isEmpty() { return this.props.isEmpty(); } + + public Map getPropsMap() { + return props; + } } diff --git a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java index b95502ab31..022e547ff7 100644 --- a/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java +++ b/internal/venice-client-common/src/main/java/com/linkedin/venice/utils/metrics/MetricsRepositoryUtils.java @@ -1,5 +1,7 @@ package com.linkedin.venice.utils.metrics; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.MetricConfig; import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.stats.AsyncGauge; @@ -20,6 +22,10 @@ public static MetricsRepository createSingleThreadedMetricsRepository() { return createSingleThreadedMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); } + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository() { + return createSingleThreadedVeniceMetricsRepository(TimeUnit.MINUTES.toMillis(1), 100); + } + public static MetricsRepository createSingleThreadedMetricsRepository( long maxMetricsMeasurementTimeoutMs, long initialMetricsMeasurementTimeoutMs) { @@ -31,4 +37,17 @@ public static MetricsRepository createSingleThreadedMetricsRepository( .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) .build())); } + + public static VeniceMetricsRepository createSingleThreadedVeniceMetricsRepository( + long maxMetricsMeasurementTimeoutMs, + long initialMetricsMeasurementTimeoutMs) { + MetricConfig tehutiMetricsConfig = new MetricConfig( + new AsyncGauge.AsyncGaugeExecutor.Builder().setMetricMeasurementThreadCount(1) + .setSlowMetricMeasurementThreadCount(1) + .setInitialMetricsMeasurementTimeoutInMs(initialMetricsMeasurementTimeoutMs) + .setMaxMetricsMeasurementTimeoutInMs(maxMetricsMeasurementTimeoutMs) + .build()); + return new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(tehutiMetricsConfig).build()); + } } diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java new file mode 100644 index 0000000000..1c1bd06b2b --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsConfigTest.java @@ -0,0 +1,107 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertThrows; +import static org.testng.Assert.assertTrue; + +import com.linkedin.venice.exceptions.VeniceException; +import com.linkedin.venice.stats.VeniceMetricsConfig.VeniceMetricsConfigBuilder; +import io.tehuti.metrics.MetricConfig; +import java.util.HashMap; +import java.util.Map; +import org.testng.annotations.Test; + + +public class VeniceMetricsConfigTest { + @Test + public void testDefaultValues() { + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().build(); + assertEquals(config.getServiceName(), "noop_service"); + assertEquals(config.getMetricPrefix(), "service"); + assertFalse(config.isEmitOpenTelemetryMetrics()); + assertFalse(config.isEmitToHttpGrpcEndpoint()); + assertFalse(config.isEmitToLog()); + assertTrue(config.isUseExponentialHistogram()); + assertEquals(config.getExponentialHistogramMaxScale(), 3); + assertEquals(config.getExponentialHistogramMaxBuckets(), 250); + } + + @Test + public void testCustomValues() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.log", "true"); + + MetricConfig metricConfig = new MetricConfig(); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().setServiceName("TestService") + .setMetricPrefix("TestPrefix") + .setTehutiMetricConfig(metricConfig) + .extractAndSetOtelConfigs(otelConfigs) + .build(); + + assertEquals(config.getServiceName(), "TestService"); + assertEquals(config.getMetricPrefix(), "TestPrefix"); + assertTrue(config.isEmitOpenTelemetryMetrics()); + assertTrue(config.getOtelConfigs().containsKey("otel.venice.enabled")); + assertTrue(config.isEmitToLog()); + assertEquals(config.getTehutiMetricConfig(), metricConfig); + } + + @Test + public void testOtelMissingConfigs() { + Map invalidOtelConfigs = new HashMap<>(); + invalidOtelConfigs.put("otel.venice.enabled", "true"); + invalidOtelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); + + VeniceMetricsConfigBuilder builder = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(invalidOtelConfigs); + + // should throw exception because required configs are missing + assertThrows(VeniceException.class, builder::build); + } + + @Test + public void testGetMetricsPrefix() { + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("venice-router"), "router"); + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("service_name"), "name"); + assertEquals(VeniceMetricsConfigBuilder.getMetricsPrefix("com.linkedin.service"), "service"); + } + + @Test + public void testOtelConfigWithInvalidMetricFormat() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.metrics.format", "INVALID_FORMAT"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertEquals( + config.getMetricFormat(), + VeniceOpenTelemetryMetricFormat.SNAKE_CASE, + "Invalid metric format should fall back to default."); + } + + @Test + public void testOtelConfigWithValidMetricFormat() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.metrics.format", "CAMEL_CASE"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertEquals(config.getMetricFormat(), VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + } + + @Test + public void testEnableHttpGrpcEndpointConfigWithRequiredFields() { + Map otelConfigs = new HashMap<>(); + otelConfigs.put("otel.venice.enabled", "true"); + otelConfigs.put("otel.venice.export.to.http.grpc.endpoint", "true"); + otelConfigs.put("otel.exporter.otlp.metrics.protocol", "http/protobuf"); + otelConfigs.put("otel.exporter.otlp.metrics.endpoint", "http://localhost"); + + VeniceMetricsConfig config = new VeniceMetricsConfigBuilder().extractAndSetOtelConfigs(otelConfigs).build(); + + assertTrue(config.isEmitToHttpGrpcEndpoint()); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java new file mode 100644 index 0000000000..d2de313da6 --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceMetricsRepositoryTest.java @@ -0,0 +1,130 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; + +import io.tehuti.Metric; +import io.tehuti.metrics.Measurable; +import io.tehuti.metrics.MetricsReporter; +import io.tehuti.metrics.MetricsRepository; +import io.tehuti.metrics.Sensor; +import io.tehuti.metrics.stats.AsyncGauge; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +public class VeniceMetricsRepositoryTest { + private VeniceMetricsRepository metricsRepository; + private VeniceOpenTelemetryMetricsRepository mockOpenTelemetryMetricsRepository; + private MetricsRepository mockDelegate; + + @BeforeMethod + public void setUp() { + VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + mockOpenTelemetryMetricsRepository = Mockito.mock(VeniceOpenTelemetryMetricsRepository.class); + mockDelegate = Mockito.mock(MetricsRepository.class); + metricsRepository = new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); + } + + @AfterMethod + public void tearDown() { + metricsRepository.close(); + } + + @Test + public void testConstructorWithDelegateAndConfig() { + VeniceMetricsConfig config = new VeniceMetricsConfig.VeniceMetricsConfigBuilder().build(); + VeniceMetricsRepository repo = + new VeniceMetricsRepository(mockDelegate, config, mockOpenTelemetryMetricsRepository); + + assertEquals(repo.getVeniceMetricsConfig(), config); + assertEquals(repo.getOpenTelemetryMetricsRepository(), mockOpenTelemetryMetricsRepository); + } + + @Test + public void testCloseWithDelegate() { + metricsRepository.close(); + Mockito.verify(mockDelegate, Mockito.times(1)).close(); + Mockito.verify(mockOpenTelemetryMetricsRepository, Mockito.times(1)).close(); + } + + @Test + public void testAddMetricDelegation() { + Measurable measurable = Mockito.mock(Measurable.class); + Metric metric = Mockito.mock(Metric.class); + + Mockito.when(mockDelegate.addMetric("testMetric", measurable)).thenReturn(metric); + + Metric returnedMetric = metricsRepository.addMetric("testMetric", measurable); + assertEquals(returnedMetric, metric); + + Mockito.verify(mockDelegate, Mockito.times(1)).addMetric("testMetric", measurable); + } + + @Test + public void testGetSensorDelegation() { + Sensor sensor = Mockito.mock(Sensor.class); + Mockito.when(mockDelegate.getSensor("testSensor")).thenReturn(sensor); + + Sensor returnedSensor = metricsRepository.getSensor("testSensor"); + assertEquals(returnedSensor, sensor); + + Mockito.verify(mockDelegate, Mockito.times(1)).getSensor("testSensor"); + } + + @Test + public void testMetricsRetrieval() { + Map mockMetrics = Mockito.mock(Map.class); + Mockito.doReturn(mockMetrics).when(mockDelegate).metrics(); + + Map retrievedMetrics = metricsRepository.metrics(); + assertEquals(retrievedMetrics, mockMetrics); + + Mockito.verify(mockDelegate, Mockito.times(1)).metrics(); + } + + @Test + public void testGetMetricDelegation() { + Metric metric = Mockito.mock(Metric.class); + Mockito.when(mockDelegate.getMetric("testMetric")).thenReturn(metric); + + Metric retrievedMetric = metricsRepository.getMetric("testMetric"); + assertEquals(retrievedMetric, metric); + + Mockito.verify(mockDelegate, Mockito.times(1)).getMetric("testMetric"); + } + + @Test + public void testAddReporterDelegation() { + MetricsReporter mockReporter = Mockito.mock(MetricsReporter.class); + + metricsRepository.addReporter(mockReporter); + Mockito.verify(mockDelegate, Mockito.times(1)).addReporter(mockReporter); + } + + @Test + public void testAsyncGaugeExecutorDelegation() { + AsyncGauge.AsyncGaugeExecutor asyncGaugeExecutor = Mockito.mock(AsyncGauge.AsyncGaugeExecutor.class); + Mockito.when(mockDelegate.getAsyncGaugeExecutor()).thenReturn(asyncGaugeExecutor); + + AsyncGauge.AsyncGaugeExecutor executor = metricsRepository.getAsyncGaugeExecutor(); + assertEquals(executor, asyncGaugeExecutor); + + Mockito.verify(mockDelegate, Mockito.times(1)).getAsyncGaugeExecutor(); + } + + @Test + public void testSensorCreationAndDeletionWithDelegate() { + Sensor mockSensor = Mockito.mock(Sensor.class); + Mockito.when(mockDelegate.sensor("testSensor")).thenReturn(mockSensor); + + Sensor sensor = metricsRepository.sensor("testSensor"); + assertEquals(sensor, mockSensor); + + metricsRepository.removeSensor("testSensor"); + Mockito.verify(mockDelegate, Mockito.times(1)).sensor("testSensor"); + Mockito.verify(mockDelegate, Mockito.times(1)).removeSensor("testSensor"); + } +} diff --git a/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java new file mode 100644 index 0000000000..85b9d109cc --- /dev/null +++ b/internal/venice-client-common/src/test/java/com/linkedin/venice/stats/VeniceOpenTelemetryMetricsRepositoryTest.java @@ -0,0 +1,141 @@ +package com.linkedin.venice.stats; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertSame; +import static org.testng.Assert.fail; + +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import java.util.HashMap; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + + +public class VeniceOpenTelemetryMetricsRepositoryTest { + private VeniceOpenTelemetryMetricsRepository metricsRepository; + + @Mock + private VeniceMetricsConfig mockMetricsConfig; + + @BeforeMethod + public void setUp() { + MockitoAnnotations.initMocks(this); + + Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(true); + Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + Mockito.when(mockMetricsConfig.getMetricPrefix()).thenReturn("test_prefix"); + Mockito.when(mockMetricsConfig.getServiceName()).thenReturn("test_service"); + Mockito.when(mockMetricsConfig.isEmitToHttpGrpcEndpoint()).thenReturn(true); + Mockito.when(mockMetricsConfig.isUseExponentialHistogram()).thenReturn(false); + + metricsRepository = new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); + } + + @AfterMethod + public void tearDown() { + metricsRepository.close(); + } + + @Test + public void testConstructorInitialize() { + // Check if OpenTelemetry and SdkMeterProvider are initialized correctly + assertNotNull(metricsRepository.getOpenTelemetry()); + assertNotNull(metricsRepository.getSdkMeterProvider()); + assertNotNull(metricsRepository.getMeter()); + } + + @Test + public void testConstructorWithEmitDisabled() { + Mockito.when(mockMetricsConfig.isEmitOpenTelemetryMetrics()).thenReturn(false); + VeniceOpenTelemetryMetricsRepository metricsRepository = + new VeniceOpenTelemetryMetricsRepository(mockMetricsConfig); + + // Verify that metrics-related fields are null when metrics are disabled + assertNull(metricsRepository.getOpenTelemetry()); + assertNull(metricsRepository.getSdkMeterProvider()); + assertNull(metricsRepository.getMeter()); + assertNull(metricsRepository.getHistogram("test", "unit", "desc")); + assertNull(metricsRepository.getCounter("test", "unit", "desc")); + } + + @Test + public void testGetOtlpHttpMetricExporterWithValidConfig() { + HashMap otelConfigs = new HashMap<>(); + otelConfigs.put("otel.exporter.otlp.endpoint", "http://localhost:4318"); + Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(otelConfigs); + + MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); + + // Verify that the exporter is not null and is of the expected type + assertNotNull(exporter); + + // Check that the exporter uses the correct endpoint + assertEquals(otelConfigs.get("otel.exporter.otlp.endpoint"), "http://localhost:4318"); + } + + @Test + public void testGetOtlpHttpMetricExporterWithEmptyConfig() { + Mockito.when(mockMetricsConfig.getOtelConfigs()).thenReturn(new HashMap<>()); + + try { + MetricExporter exporter = metricsRepository.getOtlpHttpMetricExporter(mockMetricsConfig); + assertNotNull(exporter, "Exporter should be created even with an empty config."); + } catch (Exception e) { + fail("Exporter creation should not throw an exception with empty config."); + } + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithNullName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName(null); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithEmptyName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName(""); + } + + @Test(expectedExceptions = IllegalArgumentException.class) + public void testValidateMetricNameWithInvalidName() { + VeniceOpenTelemetryMetricsRepository.validateMetricName("Invalid Name!"); + } + + @Test + public void testTransformMetricName() { + Mockito.when(mockMetricsConfig.getMetricFormat()).thenReturn(VeniceOpenTelemetryMetricFormat.SNAKE_CASE); + assertEquals(metricsRepository.getFullMetricName("prefix", "metric_name"), "prefix.metric_name"); + + String transformedName = VeniceOpenTelemetryMetricsRepository + .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.PASCAL_CASE); + assertEquals(transformedName, "Test.TestMetricName"); + + transformedName = VeniceOpenTelemetryMetricsRepository + .transformMetricName("test.test_metric_name", VeniceOpenTelemetryMetricFormat.CAMEL_CASE); + assertEquals(transformedName, "test.testMetricName"); + } + + @Test + public void testCreateTwoHistograms() { + DoubleHistogram histogram1 = metricsRepository.getHistogram("test_histogram", "unit", "description"); + DoubleHistogram histogram2 = metricsRepository.getHistogram("test_histogram", "unit", "description"); + + assertNotNull(histogram1); + assertSame(histogram1, histogram2, "Should return the same instance for the same histogram name."); + } + + @Test + public void testCreateTwoCounters() { + LongCounter counter1 = metricsRepository.getCounter("test_counter", "unit", "description"); + LongCounter counter2 = metricsRepository.getCounter("test_counter", "unit", "description"); + + assertNotNull(counter1); + assertSame(counter1, counter2, "Should return the same instance for the same counter name."); + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java index 07483b6099..85b335f2ce 100644 --- a/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/AbstractVeniceAggStoreStats.java @@ -18,7 +18,7 @@ public class AbstractVeniceAggStoreStats extends public AbstractVeniceAggStoreStats( String clusterName, MetricsRepository metricsRepository, - StatsSupplier statsSupplier, + StatsSupplierMetricsRepository statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { super(clusterName, metricsRepository, statsSupplier); @@ -28,7 +28,7 @@ public AbstractVeniceAggStoreStats( public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, - StatsSupplier statsSupplier, + StatsSupplierMetricsRepository statsSupplier, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { super(metricsRepository, statsSupplier); @@ -38,9 +38,10 @@ public AbstractVeniceAggStoreStats( public AbstractVeniceAggStoreStats( MetricsRepository metricsRepository, + String clusterName, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository); + super(metricsRepository, clusterName); this.isUnregisterMetricForDeletedStoreEnabled = isUnregisterMetricForDeletedStoreEnabled; registerStoreDataChangedListenerIfRequired(metadataRepository); } diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java new file mode 100644 index 0000000000..dce14cc69e --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategory.java @@ -0,0 +1,108 @@ +package com.linkedin.venice.stats.dimensions; + +/** + * Copied {@link io.netty.handler.codec.http.HttpStatusClass} and modified it to have 1xx, 2xx, etc. as categories + */ +public enum VeniceHttpResponseStatusCodeCategory { + INFORMATIONAL(100, 200, "1xx"), + /** + * The success class (2xx) + */ + SUCCESS(200, 300, "2xx"), + /** + * The redirection class (3xx) + */ + REDIRECTION(300, 400, "3xx"), + /** + * The client error class (4xx) + */ + CLIENT_ERROR(400, 500, "4xx"), + /** + * The server error class (5xx) + */ + SERVER_ERROR(500, 600, "5xx"), + /** + * The unknown class + */ + UNKNOWN(0, 0, "Unknown") { + @Override + public boolean contains(int code) { + return code < 100 || code >= 600; + } + }; + + /** + * Returns the class of the specified HTTP status code. + */ + public static VeniceHttpResponseStatusCodeCategory valueOf(int code) { + if (INFORMATIONAL.contains(code)) { + return INFORMATIONAL; + } + if (SUCCESS.contains(code)) { + return SUCCESS; + } + if (REDIRECTION.contains(code)) { + return REDIRECTION; + } + if (CLIENT_ERROR.contains(code)) { + return CLIENT_ERROR; + } + if (SERVER_ERROR.contains(code)) { + return SERVER_ERROR; + } + return UNKNOWN; + } + + /** + * Returns the class of the specified HTTP status code. + * @param code Just the numeric portion of the http status code. + */ + public static VeniceHttpResponseStatusCodeCategory valueOf(CharSequence code) { + if (code != null && code.length() == 3) { + char c0 = code.charAt(0); + return isDigit(c0) && isDigit(code.charAt(1)) && isDigit(code.charAt(2)) ? valueOf(digit(c0) * 100) : UNKNOWN; + } + return UNKNOWN; + } + + private static int digit(char c) { + return c - '0'; + } + + private static boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + private final int min; + private final int max; + private final String category; + + VeniceHttpResponseStatusCodeCategory(int min, int max, String category) { + this.min = min; + this.max = max; + this.category = category; + } + + /** + * Returns {@code true} if and only if the specified HTTP status code falls into this class. + */ + public boolean contains(int code) { + return code >= min && code < max; + } + + /** + * Returns the category of this HTTP status class. + */ + public String getCategory() { + return category; + } + + // used for tests + public int getMin() { + return min; + } + + public int getMax() { + return max; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java new file mode 100644 index 0000000000..52cb21f610 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensions.java @@ -0,0 +1,48 @@ +package com.linkedin.venice.stats.dimensions; + +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.CAMEL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.PASCAL_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat.SNAKE_CASE; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.transformMetricName; +import static com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository.validateMetricName; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; + + +public enum VeniceMetricsDimensions { + VENICE_STORE_NAME("venice.store.name"), VENICE_CLUSTER_NAME("venice.cluster.name"), + + /** {@link com.linkedin.venice.read.RequestType#requestTypeName} */ + VENICE_REQUEST_METHOD("venice.request.method"), + + /** {@link io.netty.handler.codec.http.HttpResponseStatus} ie. 200, 400, etc */ + HTTP_RESPONSE_STATUS_CODE("http.response.status_code"), + + /** {@link VeniceHttpResponseStatusCodeCategory#category} ie. 1xx, 2xx, etc */ + HTTP_RESPONSE_STATUS_CODE_CATEGORY("http.response.status_code_category"), + + /** {@link VeniceRequestValidationOutcome#outcome} */ + VENICE_REQUEST_VALIDATION_OUTCOME("venice.request.validation_outcome"), + + /** {@link VeniceResponseStatusCategory} */ + VENICE_RESPONSE_STATUS_CODE_CATEGORY("venice.response.status_code_category"), + + /** {@link VeniceRequestRetryType} */ + VENICE_REQUEST_RETRY_TYPE("venice.request.retry_type"), + + /** {@link VeniceRequestRetryAbortReason} */ + VENICE_REQUEST_RETRY_ABORT_REASON("venice.request.retry_abort_reason"); + + private final String[] dimensionName = new String[VeniceOpenTelemetryMetricFormat.SIZE]; + + VeniceMetricsDimensions(String dimensionName) { + validateMetricName(dimensionName); + this.dimensionName[SNAKE_CASE.getValue()] = dimensionName; + this.dimensionName[CAMEL_CASE.getValue()] = transformMetricName(dimensionName, CAMEL_CASE); + this.dimensionName[PASCAL_CASE.getValue()] = transformMetricName(dimensionName, PASCAL_CASE); + } + + public String getDimensionName(VeniceOpenTelemetryMetricFormat format) { + return dimensionName[format.getValue()]; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java new file mode 100644 index 0000000000..adc143042d --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReason.java @@ -0,0 +1,17 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestRetryAbortReason { + RETRY_ABORTED_BY_SLOW_ROUTE("slow_route"), RETRY_ABORTED_BY_DELAY_CONSTRAINT("delay_constraint"), + RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT("max_retry_router_limit"), + RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA("no_available_replica"); + + private final String abortReason; + + VeniceRequestRetryAbortReason(String abortReason) { + this.abortReason = abortReason; + } + + public String getAbortReason() { + return this.abortReason; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java new file mode 100644 index 0000000000..7faa49c5a4 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryType.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestRetryType { + ERROR_RETRY("error_retry"), LONG_TAIL_RETRY("long_tail_retry"); + + private final String retryType; + + VeniceRequestRetryType(String retryType) { + this.retryType = retryType; + } + + public String getRetryType() { + return this.retryType; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java new file mode 100644 index 0000000000..8b961e4592 --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcome.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceRequestValidationOutcome { + VALID("valid"), INVALID_KEY_COUNT_LIMIT_EXCEEDED("invalid_key_count_limit_exceeded"); + + private final String outcome; + + VeniceRequestValidationOutcome(String outcome) { + this.outcome = outcome; + } + + public String getOutcome() { + return this.outcome; + } +} diff --git a/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java new file mode 100644 index 0000000000..80372c0e2d --- /dev/null +++ b/internal/venice-common/src/main/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategory.java @@ -0,0 +1,15 @@ +package com.linkedin.venice.stats.dimensions; + +public enum VeniceResponseStatusCategory { + HEALTHY("healthy"), UNHEALTHY("unhealthy"), TARDY("tardy"), THROTTLED("throttled"), BAD_REQUEST("bad_request"); + + private final String category; + + VeniceResponseStatusCategory(String category) { + this.category = category; + } + + public String getCategory() { + return this.category; + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java new file mode 100644 index 0000000000..2ba9e55dba --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceHttpResponseStatusCodeCategoryTest.java @@ -0,0 +1,56 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotEquals; + +import org.testng.annotations.Test; + + +public class VeniceHttpResponseStatusCodeCategoryTest { + @Test() + public void testValues() { + for (VeniceHttpResponseStatusCodeCategory category: VeniceHttpResponseStatusCodeCategory.values()) { + switch (category) { + case INFORMATIONAL: + assertEquals(category.getCategory(), "1xx"); + assertEquals(category.getMin(), 100); + assertEquals(category.getMax(), 200); + break; + case SUCCESS: + assertEquals(category.getCategory(), "2xx"); + assertEquals(category.getMin(), 200); + assertEquals(category.getMax(), 300); + break; + case REDIRECTION: + assertEquals(category.getCategory(), "3xx"); + assertEquals(category.getMin(), 300); + assertEquals(category.getMax(), 400); + break; + case CLIENT_ERROR: + assertEquals(category.getCategory(), "4xx"); + assertEquals(category.getMin(), 400); + assertEquals(category.getMax(), 500); + break; + case SERVER_ERROR: + assertEquals(category.getCategory(), "5xx"); + assertEquals(category.getMin(), 500); + assertEquals(category.getMax(), 600); + break; + case UNKNOWN: + assertEquals(category.getCategory(), "Unknown"); + assertEquals(category.getMin(), 0); + assertEquals(category.getMax(), 0); + break; + default: + throw new IllegalArgumentException("Unknown category: " + category); + } + } + } + + @Test + public void testUnknownCategory() { + assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(99), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + assertNotEquals(VeniceHttpResponseStatusCodeCategory.valueOf(100), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + assertEquals(VeniceHttpResponseStatusCodeCategory.valueOf(600), VeniceHttpResponseStatusCodeCategory.UNKNOWN); + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java new file mode 100644 index 0000000000..1e459f40ed --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceMetricsDimensionsTest.java @@ -0,0 +1,123 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import org.testng.annotations.Test; + + +public class VeniceMetricsDimensionsTest { + @Test + public void testGetDimensionNameInSnakeCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.SNAKE_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "venice.store.name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "venice.cluster.name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "venice.request.method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "http.response.status_code"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "http.response.status_code_category"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "venice.request.validation_outcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "venice.response.status_code_category"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.request.retry_type"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "venice.request.retry_abort_reason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } + + @Test + public void testGetDimensionNameInCamelCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.CAMEL_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "venice.store.name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "venice.cluster.name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "venice.request.method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "http.response.statusCode"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "http.response.statusCodeCategory"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "venice.request.validationOutcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "venice.response.statusCodeCategory"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "venice.request.retryType"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "venice.request.retryAbortReason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } + + @Test + public void testGetDimensionNameInPascalCase() { + VeniceOpenTelemetryMetricFormat format = VeniceOpenTelemetryMetricFormat.PASCAL_CASE; + for (VeniceMetricsDimensions dimension: VeniceMetricsDimensions.values()) { + switch (dimension) { + case VENICE_STORE_NAME: + assertEquals(dimension.getDimensionName(format), "Venice.Store.Name"); + break; + case VENICE_CLUSTER_NAME: + assertEquals(dimension.getDimensionName(format), "Venice.Cluster.Name"); + break; + case VENICE_REQUEST_METHOD: + assertEquals(dimension.getDimensionName(format), "Venice.Request.Method"); + break; + case HTTP_RESPONSE_STATUS_CODE: + assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCode"); + break; + case HTTP_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "Http.Response.StatusCodeCategory"); + break; + case VENICE_REQUEST_VALIDATION_OUTCOME: + assertEquals(dimension.getDimensionName(format), "Venice.Request.ValidationOutcome"); + break; + case VENICE_RESPONSE_STATUS_CODE_CATEGORY: + assertEquals(dimension.getDimensionName(format), "Venice.Response.StatusCodeCategory"); + break; + case VENICE_REQUEST_RETRY_TYPE: + assertEquals(dimension.getDimensionName(format), "Venice.Request.RetryType"); + break; + case VENICE_REQUEST_RETRY_ABORT_REASON: + assertEquals(dimension.getDimensionName(format), "Venice.Request.RetryAbortReason"); + break; + default: + throw new IllegalArgumentException("Unknown dimension: " + dimension); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java new file mode 100644 index 0000000000..6476b3e1c2 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryAbortReasonTest.java @@ -0,0 +1,30 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestRetryAbortReasonTest { + @Test + public void testRetryRequestAbortReason() { + for (VeniceRequestRetryAbortReason reason: VeniceRequestRetryAbortReason.values()) { + switch (reason) { + case RETRY_ABORTED_BY_SLOW_ROUTE: + assertEquals(reason.getAbortReason(), "slow_route"); + break; + case RETRY_ABORTED_BY_DELAY_CONSTRAINT: + assertEquals(reason.getAbortReason(), "delay_constraint"); + break; + case RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT: + assertEquals(reason.getAbortReason(), "max_retry_router_limit"); + break; + case RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA: + assertEquals(reason.getAbortReason(), "no_available_replica"); + break; + default: + throw new IllegalArgumentException("Unknown reason: " + reason); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java new file mode 100644 index 0000000000..52c8bfb94c --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestRetryTypeTest.java @@ -0,0 +1,24 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestRetryTypeTest { + @Test + public void testVeniceRequestRetryType() { + for (VeniceRequestRetryType retryType: VeniceRequestRetryType.values()) { + switch (retryType) { + case ERROR_RETRY: + assertEquals(retryType.getRetryType(), "error_retry"); + break; + case LONG_TAIL_RETRY: + assertEquals(retryType.getRetryType(), "long_tail_retry"); + break; + default: + throw new IllegalArgumentException("Unknown retry type: " + retryType); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java new file mode 100644 index 0000000000..28f804eab5 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceRequestValidationOutcomeTest.java @@ -0,0 +1,24 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceRequestValidationOutcomeTest { + @Test + public void testVeniceRequestValidationOutcome() { + for (VeniceRequestValidationOutcome outcome: VeniceRequestValidationOutcome.values()) { + switch (outcome) { + case VALID: + assertEquals(outcome.getOutcome(), "valid"); + break; + case INVALID_KEY_COUNT_LIMIT_EXCEEDED: + assertEquals(outcome.getOutcome(), "invalid_key_count_limit_exceeded"); + break; + default: + throw new IllegalArgumentException("Unknown outcome: " + outcome); + } + } + } +} diff --git a/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java new file mode 100644 index 0000000000..22272d3576 --- /dev/null +++ b/internal/venice-common/src/test/java/com/linkedin/venice/stats/dimensions/VeniceResponseStatusCategoryTest.java @@ -0,0 +1,33 @@ +package com.linkedin.venice.stats.dimensions; + +import static org.testng.Assert.assertEquals; + +import org.testng.annotations.Test; + + +public class VeniceResponseStatusCategoryTest { + @Test + public void testVeniceResponseStatusCategory() { + for (VeniceResponseStatusCategory responseStatusCategory: VeniceResponseStatusCategory.values()) { + switch (responseStatusCategory) { + case HEALTHY: + assertEquals(responseStatusCategory.getCategory(), "healthy"); + break; + case UNHEALTHY: + assertEquals(responseStatusCategory.getCategory(), "unhealthy"); + break; + case TARDY: + assertEquals(responseStatusCategory.getCategory(), "tardy"); + break; + case THROTTLED: + assertEquals(responseStatusCategory.getCategory(), "throttled"); + break; + case BAD_REQUEST: + assertEquals(responseStatusCategory.getCategory(), "bad_request"); + break; + default: + throw new IllegalArgumentException("Unknown response status category: " + responseStatusCategory); + } + } + } +} diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java index 23260f6058..b5092d71fa 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/integration/utils/VeniceRouterWrapper.java @@ -60,6 +60,7 @@ public class VeniceRouterWrapper extends ProcessWrapper implements MetricsAware public static final String CLUSTER_DISCOVERY_D2_SERVICE_NAME = ClientConfig.DEFAULT_CLUSTER_DISCOVERY_D2_SERVICE_NAME + "_test"; private static final String ROUTER_SERVICE_NAME = "venice-router"; + private static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; private final VeniceProperties properties; private final String zkAddress; private RouterServer service; @@ -152,6 +153,12 @@ static StatefulServiceProvider generateService( .put(MAX_READ_CAPACITY, DEFAULT_PER_ROUTER_READ_QUOTA) .put(SYSTEM_SCHEMA_CLUSTER_NAME, clusterName) .put(ROUTER_STORAGE_NODE_CLIENT_TYPE, StorageNodeClientType.APACHE_HTTP_ASYNC_CLIENT.name()) + .put("otel.venice.enabled", Boolean.TRUE.toString()) + .put("otel.venice.export.to.log", Boolean.TRUE.toString()) + .put("otel.venice.export.to.http.grpc.endpoint", Boolean.TRUE.toString()) + .put("otel.exporter.otlp.metrics.protocol", "http/protobuf") + .put("otel.exporter.otlp.metrics.endpoint", "http://localhost:4318/v1/metrics") + .put("otel.exporter.otlp.metrics.temporality.preference", "delta") .put(properties); // setup d2 config first @@ -175,7 +182,10 @@ static StatefulServiceProvider generateService( d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils.getVeniceMetricsRepository( + ROUTER_SERVICE_NAME, + ROUTER_SERVICE_METRIC_PREFIX, + routerProperties.getPropsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); return new VeniceRouterWrapper( @@ -237,7 +247,8 @@ protected void newProcess() { d2Servers, Optional.empty(), Optional.of(SslUtils.getVeniceLocalSslFactory()), - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), D2TestUtils.getAndStartD2Client(zkAddress), CLUSTER_DISCOVERY_D2_SERVICE_NAME); LOGGER.info("Started VeniceRouterWrapper: {}", this); diff --git a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java index a3a73dc59f..1349a56226 100644 --- a/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java +++ b/internal/venice-test-common/src/integrationTest/java/com/linkedin/venice/router/api/TestVeniceDispatcher.java @@ -37,6 +37,7 @@ import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestUtils; import io.netty.handler.codec.http.DefaultFullHttpResponse; import io.netty.handler.codec.http.DefaultHttpHeaders; @@ -46,7 +47,6 @@ import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -322,7 +322,7 @@ private VeniceDispatcher getMockDispatcher(boolean forcePendingCheck, boolean fo doReturn(TimeUnit.MINUTES.toMillis(1)).when(routerConfig).getLeakedFutureCleanupThresholdMs(); doReturn(24).when(routerConfig).getIoThreadCountInPoolMode(); ReadOnlyStoreRepository mockStoreRepo = mock(ReadOnlyStoreRepository.class); - MetricsRepository mockMetricsRepo = new MetricsRepository(); + VeniceMetricsRepository mockMetricsRepo = new VeniceMetricsRepository(); RouterStats mockRouterStats = mock(RouterStats.class); RouteHttpRequestStats routeHttpRequestStats = mock(RouteHttpRequestStats.class); when(mockRouterStats.getStatsByType(any())).thenReturn(mock(AggRouterHttpRequestStats.class)); diff --git a/services/venice-router/build.gradle b/services/venice-router/build.gradle index 1eda645461..e1bf3cef4e 100644 --- a/services/venice-router/build.gradle +++ b/services/venice-router/build.gradle @@ -64,6 +64,10 @@ dependencies { implementation libraries.httpAsyncClient implementation project(':internal:alpini:router:alpini-router-api') implementation project(':internal:alpini:router:alpini-router-impl') + implementation libraries.opentelemetryApi + implementation libraries.opentelemetrySdk + implementation libraries.opentelemetryExporterLogging + implementation libraries.opentelemetryExporterOtlp testImplementation project(':clients:venice-thin-client') testImplementation libraries.kafkaClientsTest // TODO: Get rid of Kafka dependency in venice-common (used by TopicCreator) diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java index c70f2bedd7..eea6ce6d35 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/RouterServer.java @@ -82,6 +82,8 @@ import com.linkedin.venice.stats.TehutiUtils; import com.linkedin.venice.stats.ThreadPoolStats; import com.linkedin.venice.stats.VeniceJVMStats; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.stats.ZkClientStatusStats; import com.linkedin.venice.throttle.EventThrottler; import com.linkedin.venice.utils.DaemonThreadFactory; @@ -135,7 +137,7 @@ public class RouterServer extends AbstractVeniceService { private static final String ROUTER_RETRY_MANAGER_THREAD_PREFIX = "Router-retry-manager-thread"; // Immutable state private final List serviceDiscoveryAnnouncers; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final Optional sslFactory; private final Optional accessController; @@ -194,6 +196,7 @@ public class RouterServer extends AbstractVeniceService { private final Map optionalChannelHandlers = new LinkedHashMap<>(); private static final String ROUTER_SERVICE_NAME = "venice-router"; + private static final String ROUTER_SERVICE_METRIC_PREFIX = "router"; /** * Thread number used to monitor the listening port; @@ -272,7 +275,8 @@ public RouterServer( serviceDiscoveryAnnouncers, accessController, sslFactory, - TehutiUtils.getMetricsRepository(ROUTER_SERVICE_NAME), + TehutiUtils + .getVeniceMetricsRepository(ROUTER_SERVICE_NAME, ROUTER_SERVICE_METRIC_PREFIX, properties.getPropsMap()), null, "venice-discovery"); } @@ -287,7 +291,7 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - MetricsRepository metricsRepository) { + VeniceMetricsRepository metricsRepository) { this( properties, serviceDiscoveryAnnouncers, @@ -306,6 +310,28 @@ public RouterServer( MetricsRepository metricsRepository, D2Client d2Client, String d2ServiceName) { + this( + properties, + serviceDiscoveryAnnouncers, + accessController, + sslFactory, + new VeniceMetricsRepository( + metricsRepository, + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") + .extractAndSetOtelConfigs(properties.getPropsMap()) + .build()), + d2Client, + d2ServiceName); + } + + public RouterServer( + VeniceProperties properties, + List serviceDiscoveryAnnouncers, + Optional accessController, + Optional sslFactory, + VeniceMetricsRepository metricsRepository, + D2Client d2Client, + String d2ServiceName) { this(properties, serviceDiscoveryAnnouncers, accessController, sslFactory, metricsRepository, true); HelixReadOnlyZKSharedSystemStoreRepository readOnlyZKSharedSystemStoreRepository = @@ -323,6 +349,7 @@ public RouterServer( this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( metricsRepository, + config.getClusterName(), requestType, config.isKeyValueProfilingEnabled(), metadataRepository, @@ -366,7 +393,7 @@ private RouterServer( List serviceDiscoveryAnnouncers, Optional accessController, Optional sslFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, boolean isCreateHelixManager) { config = new VeniceRouterConfig(properties); zkClient = @@ -381,7 +408,7 @@ private RouterServer( this.metaStoreShadowReader = Optional.empty(); this.metricsRepository = metricsRepository; - this.aggHostHealthStats = new AggHostHealthStats(metricsRepository); + this.aggHostHealthStats = new AggHostHealthStats(metricsRepository, config.getClusterName()); this.serviceDiscoveryAnnouncers = serviceDiscoveryAnnouncers; this.accessController = accessController; @@ -409,13 +436,23 @@ public RouterServer( List serviceDiscoveryAnnouncers, Optional sslFactory, HelixLiveInstanceMonitor liveInstanceMonitor) { - this(properties, serviceDiscoveryAnnouncers, Optional.empty(), sslFactory, new MetricsRepository(), false); + this( + properties, + serviceDiscoveryAnnouncers, + Optional.empty(), + sslFactory, + new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setServiceName("venice-router") + .extractAndSetOtelConfigs(properties.getPropsMap()) + .build()), + false); this.routingDataRepository = routingDataRepository; this.hybridStoreQuotaRepository = hybridStoreQuotaRepository; this.metadataRepository = metadataRepository; this.routerStats = new RouterStats<>( requestType -> new AggRouterHttpRequestStats( metricsRepository, + config.getClusterName(), requestType, config.isKeyValueProfilingEnabled(), metadataRepository, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java index 58cf615980..5c79e02630 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/RouterExceptionAndTrackingUtils.java @@ -154,7 +154,7 @@ private static void metricTracking( // If we don't know the actual store name, this error will only be aggregated in server level, but not // in store level if (responseStatus.equals(BAD_REQUEST) || responseStatus.equals(REQUEST_ENTITY_TOO_LARGE)) { - stats.recordBadRequest(storeName.orElse(null)); + stats.recordBadRequest(storeName.orElse(null), responseStatus); } else if (responseStatus.equals(TOO_MANY_REQUESTS)) { if (storeName.isPresent()) { if (requestType.isPresent()) { @@ -165,7 +165,7 @@ private static void metricTracking( * * TODO: Remove this metric after the above work is done... */ - stats.recordThrottledRequest(storeName.get()); + stats.recordThrottledRequest(storeName.get(), responseStatus); } } else { // not possible to have empty store name in this scenario @@ -198,7 +198,7 @@ private static void metricTracking( return; } - stats.recordUnhealthyRequest(storeName.orElse(null)); + stats.recordUnhealthyRequest(storeName.orElse(null), responseStatus); if (responseStatus.equals(SERVICE_UNAVAILABLE)) { if (storeName.isPresent()) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java index cf75f003e4..0a3ccefd44 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceDispatcher.java @@ -30,6 +30,7 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedResponse; import com.linkedin.venice.router.throttle.PendingRequestThrottler; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.LatencyUtils; import com.linkedin.venice.utils.Pair; import com.linkedin.venice.utils.Utils; @@ -40,7 +41,6 @@ import io.netty.handler.codec.http.HttpHeaderNames; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.util.Collections; import java.util.List; @@ -95,7 +95,7 @@ public VeniceDispatcher( VeniceRouterConfig config, ReadOnlyStoreRepository storeRepository, RouterStats perStoreStatsByType, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, StorageNodeClient storageNodeClient, RouteHttpRequestStats routeHttpRequestStats, AggHostHealthStats aggHostHealthStats, diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java index 5f1e81b68f..199b5a548a 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VenicePathParser.java @@ -34,11 +34,11 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.streaming.VeniceChunkedWriteHandler; import com.linkedin.venice.router.utils.VeniceRouterUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.streaming.StreamingUtils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.channel.ChannelHandlerContext; import io.netty.handler.codec.http.HttpResponseStatus; -import io.tehuti.metrics.MetricsRepository; import java.util.Collection; import java.util.Collections; import java.util.Map; @@ -114,7 +114,7 @@ public class VenicePathParser private final ReadOnlyStoreRepository storeRepository; private final VeniceRouterConfig routerConfig; private final CompressorFactory compressorFactory; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final ScheduledExecutorService retryManagerScheduler; private final Map routerSingleKeyRetryManagers; private final Map routerMultiKeyRetryManagers; @@ -134,7 +134,7 @@ public VenicePathParser( ReadOnlyStoreRepository storeRepository, VeniceRouterConfig routerConfig, CompressorFactory compressorFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, ScheduledExecutorService retryManagerScheduler) { this.versionFinder = versionFinder; this.partitionFinder = partitionFinder; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java index a883dc0970..1275ff4c01 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceResponseAggregator.java @@ -230,7 +230,7 @@ public FullHttpResponse buildResponse( } } - HttpResponseStatus responseStatus = finalResponse.status(); + HttpResponseStatus httpResponseStatus = finalResponse.status(); Map allMetrics = metrics.getMetrics(); /** * All the metrics in {@link com.linkedin.ddsstorage.router.api.MetricNames} are supported in {@link Metrics}. @@ -243,21 +243,20 @@ public FullHttpResponse buildResponse( // TODO: When a batch get throws a quota exception, the ROUTER_SERVER_TIME is missing, so we can't record anything // here... double latency = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS)); - stats.recordLatency(storeName, latency); - if (HEALTHY_STATUSES.contains(responseStatus)) { + if (HEALTHY_STATUSES.contains(httpResponseStatus)) { routerStats.getStatsByType(RequestType.SINGLE_GET) .recordReadQuotaUsage(storeName, venicePath.getPartitionKeys().size()); if (isFastRequest(latency, requestType)) { - stats.recordHealthyRequest(storeName, latency); + stats.recordHealthyRequest(storeName, latency, httpResponseStatus); } else { - stats.recordTardyRequest(storeName, latency); + stats.recordTardyRequest(storeName, latency, httpResponseStatus); } - } else if (responseStatus.equals(TOO_MANY_REQUESTS)) { + } else if (httpResponseStatus.equals(TOO_MANY_REQUESTS)) { LOGGER.debug("request is rejected by storage node because quota is exceeded"); - stats.recordThrottledRequest(storeName, latency); + stats.recordThrottledRequest(storeName, latency, httpResponseStatus); } else { - LOGGER.debug("Unhealthy request detected, latency: {}ms, response status: {}", latency, responseStatus); - stats.recordUnhealthyRequest(storeName, latency); + LOGGER.debug("Unhealthy request detected, latency: {}ms, response status: {}", latency, httpResponseStatus); + stats.recordUnhealthyRequest(storeName, latency, httpResponseStatus); } } timeValue = allMetrics.get(ROUTER_RESPONSE_WAIT_TIME); @@ -275,7 +274,7 @@ public FullHttpResponse buildResponse( double routingTime = LatencyUtils.convertNSToMS(timeValue.getRawValue(TimeUnit.NANOSECONDS)); stats.recordRequestRoutingLatency(storeName, routingTime); } - if (HEALTHY_STATUSES.contains(responseStatus) && !venicePath.isStreamingRequest()) { + if (HEALTHY_STATUSES.contains(httpResponseStatus) && !venicePath.isStreamingRequest()) { // Only record successful response stats.recordResponseSize(storeName, finalResponse.content().readableBytes()); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java index e8670193a0..a40e5c0e6e 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/VeniceVersionFinder.java @@ -19,9 +19,9 @@ import com.linkedin.venice.router.stats.RouterCurrentVersionStats; import com.linkedin.venice.router.stats.StaleVersionReason; import com.linkedin.venice.router.stats.StaleVersionStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.RedundantExceptionFilter; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import java.util.List; import java.util.Map; import java.util.Optional; @@ -51,7 +51,7 @@ public class VeniceVersionFinder { private final HelixBaseRoutingRepository routingDataRepository; private final CompressorFactory compressorFactory; - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; public VeniceVersionFinder( ReadOnlyStoreRepository metadataRepository, @@ -61,7 +61,7 @@ public VeniceVersionFinder( Map clusterToD2Map, String clusterName, CompressorFactory compressorFactory, - MetricsRepository metricsRepository) { + VeniceMetricsRepository metricsRepository) { this.metadataRepository = metadataRepository; this.routingDataRepository = routingDataRepository; this.stats = stats; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java index c36023b87b..452d3f7299 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/api/routing/helix/HelixGroupSelector.java @@ -4,7 +4,7 @@ import com.linkedin.venice.exceptions.VeniceException; import com.linkedin.venice.helix.HelixInstanceConfigRepository; import com.linkedin.venice.router.stats.HelixGroupStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import java.util.concurrent.TimeUnit; @@ -26,7 +26,7 @@ public class HelixGroupSelector implements HelixGroupSelectionStrategy { private final HelixGroupStats helixGroupStats; public HelixGroupSelector( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, HelixInstanceConfigRepository instanceConfigRepository, HelixGroupSelectionStrategyEnum strategyEnum, TimeoutProcessor timeoutProcessor) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java index c6f1395c0f..40479dd457 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/httpclient/ApacheHttpAsyncStorageNodeClient.java @@ -14,12 +14,12 @@ import com.linkedin.venice.service.AbstractVeniceService; import com.linkedin.venice.stats.DnsLookupStats; import com.linkedin.venice.stats.HttpConnectionPoolStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.DaemonThreadFactory; import com.linkedin.venice.utils.Utils; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; -import io.tehuti.metrics.MetricsRepository; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -83,7 +83,7 @@ public class ApacheHttpAsyncStorageNodeClient implements StorageNodeClient { public ApacheHttpAsyncStorageNodeClient( VeniceRouterConfig config, Optional sslFactory, - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, LiveInstanceMonitor monitor) { int totalIOThreadNum = config.getIoThreadCountInPoolMode(); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java index aeeed721e3..675177df66 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AdminOperationsStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.router.VeniceRouterConfig; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Count; @@ -12,7 +12,7 @@ public class AdminOperationsStats extends AbstractVeniceStats { private final Sensor adminRequestSensor; private final Sensor errorAdminRequestSensor; - public AdminOperationsStats(MetricsRepository metricsRepository, String name, VeniceRouterConfig config) { + public AdminOperationsStats(VeniceMetricsRepository metricsRepository, String name, VeniceRouterConfig config) { super(metricsRepository, name); adminRequestSensor = registerSensorIfAbsent("admin_request", new Count()); errorAdminRequestSensor = registerSensorIfAbsent("error_admin_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java index cc72755409..61506493cd 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggHostHealthStats.java @@ -2,18 +2,15 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.StatsUtils; -import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; -import java.util.Map; +import com.linkedin.venice.stats.VeniceMetricsRepository; public class AggHostHealthStats extends AbstractVeniceAggStats { - private final Map hostHealthStatsMap = new VeniceConcurrentHashMap<>(); - - public AggHostHealthStats(MetricsRepository metricsRepository) { + public AggHostHealthStats(VeniceMetricsRepository metricsRepository, String clusterName) { super( + (repo, hostName, cluster) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName)), metricsRepository, - (repo, hostName) -> new HostHealthStats(repo, StatsUtils.convertHostnameToMetricName(hostName))); + clusterName); } private HostHealthStats getHostStats(String hostName) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java index 517f1485e7..e39b072a19 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/AggRouterHttpRequestStats.java @@ -5,8 +5,9 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceAggStoreStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; +import io.netty.handler.codec.http.HttpResponseStatus; import java.util.Map; import java.util.function.Function; @@ -15,25 +16,33 @@ public class AggRouterHttpRequestStats extends AbstractVeniceAggStoreStats scatterGatherStatsMap = new VeniceConcurrentHashMap<>(); public AggRouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, + String clusterName, RequestType requestType, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - this(metricsRepository, requestType, false, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); + this( + metricsRepository, + clusterName, + requestType, + false, + metadataRepository, + isUnregisterMetricForDeletedStoreEnabled); } public AggRouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, + String cluster, RequestType requestType, boolean isKeyValueProfilingEnabled, ReadOnlyStoreRepository metadataRepository, boolean isUnregisterMetricForDeletedStoreEnabled) { - super(metricsRepository, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); + super(metricsRepository, cluster, metadataRepository, isUnregisterMetricForDeletedStoreEnabled); /** * Use a setter function to bypass the restriction that the supertype constructor could not * touch member fields of current object. */ - setStatsSupplier((metricsRepo, storeName) -> { + setStatsSupplier((metricsRepo, storeName, clusterName) -> { ScatterGatherStats stats; if (storeName.equals(AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT)) { stats = new AggScatterGatherStats(); @@ -41,7 +50,13 @@ public AggRouterHttpRequestStats( stats = scatterGatherStatsMap.computeIfAbsent(storeName, k -> new ScatterGatherStats()); } - return new RouterHttpRequestStats(metricsRepo, storeName, requestType, stats, isKeyValueProfilingEnabled); + return new RouterHttpRequestStats( + metricsRepo, + storeName, + clusterName, + requestType, + stats, + isKeyValueProfilingEnabled); }); } @@ -50,19 +65,19 @@ public ScatterGatherStats getScatterGatherStatsForStore(String storeName) { } public void recordRequest(String storeName) { - totalStats.recordRequest(); - getStoreStats(storeName).recordRequest(); + totalStats.recordIncomingRequest(); + getStoreStats(storeName).recordIncomingRequest(); } - public void recordHealthyRequest(String storeName, double latency) { - totalStats.recordHealthyRequest(latency); - getStoreStats(storeName).recordHealthyRequest(latency); + public void recordHealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordHealthyRequest(latency, responseStatus); + getStoreStats(storeName).recordHealthyRequest(latency, responseStatus); } - public void recordUnhealthyRequest(String storeName) { - totalStats.recordUnhealthyRequest(); + public void recordUnhealthyRequest(String storeName, HttpResponseStatus responseStatus) { + totalStats.recordUnhealthyRequest(responseStatus); if (storeName != null) { - getStoreStats(storeName).recordUnhealthyRequest(); + getStoreStats(storeName).recordUnhealthyRequest(responseStatus); } } @@ -71,10 +86,10 @@ public void recordUnavailableReplicaStreamingRequest(String storeName) { getStoreStats(storeName).recordUnavailableReplicaStreamingRequest(); } - public void recordUnhealthyRequest(String storeName, double latency) { - totalStats.recordUnhealthyRequest(latency); + public void recordUnhealthyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordUnhealthyRequest(latency, responseStatus); if (storeName != null) { - getStoreStats(storeName).recordUnhealthyRequest(latency); + getStoreStats(storeName).recordUnhealthyRequest(latency, responseStatus); } } @@ -89,9 +104,9 @@ public void recordReadQuotaUsage(String storeName, int quotaUsage) { getStoreStats(storeName).recordReadQuotaUsage(quotaUsage); } - public void recordTardyRequest(String storeName, double latency) { - totalStats.recordTardyRequest(latency); - getStoreStats(storeName).recordTardyRequest(latency); + public void recordTardyRequest(String storeName, double latency, HttpResponseStatus responseStatus) { + totalStats.recordTardyRequest(latency, responseStatus); + getStoreStats(storeName).recordTardyRequest(latency, responseStatus); } /** @@ -101,20 +116,20 @@ public void recordTardyRequest(String storeName, double latency) { * * TODO: Remove this overload after fixing the above. */ - public void recordThrottledRequest(String storeName) { - totalStats.recordThrottledRequest(); - getStoreStats(storeName).recordThrottledRequest(); + public void recordThrottledRequest(String storeName, HttpResponseStatus httpResponseStatus) { + totalStats.recordThrottledRequest(httpResponseStatus); + getStoreStats(storeName).recordThrottledRequest(httpResponseStatus); } - public void recordThrottledRequest(String storeName, double latency) { - totalStats.recordThrottledRequest(latency); - getStoreStats(storeName).recordThrottledRequest(latency); + public void recordThrottledRequest(String storeName, double latency, HttpResponseStatus httpResponseStatus) { + totalStats.recordThrottledRequest(latency, httpResponseStatus); + getStoreStats(storeName).recordThrottledRequest(latency, httpResponseStatus); } - public void recordBadRequest(String storeName) { - totalStats.recordBadRequest(); + public void recordBadRequest(String storeName, HttpResponseStatus responseStatus) { + totalStats.recordBadRequest(responseStatus); if (storeName != null) { - getStoreStats(storeName).recordBadRequest(); + getStoreStats(storeName).recordBadRequest(responseStatus); } } @@ -146,7 +161,9 @@ public void recordFanoutRequestCount(String storeName, int count) { public void recordLatency(String storeName, double latency) { totalStats.recordLatency(latency); - getStoreStats(storeName).recordLatency(latency); + if (storeName != null) { + getStoreStats(storeName).recordLatency(latency); + } } public void recordResponseWaitingTime(String storeName, double waitingTime) { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java index 53a269c74b..55d95dff70 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HealthCheckStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -10,7 +10,7 @@ public class HealthCheckStats extends AbstractVeniceStats { private final Sensor healthCheckRequestSensor; private final Sensor errorHealthCheckRequestSensor; - public HealthCheckStats(MetricsRepository metricsRepository, String name) { + public HealthCheckStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); healthCheckRequestSensor = registerSensor("healthcheck_request", new Count()); errorHealthCheckRequestSensor = registerSensor("error_healthcheck_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java index a248de31cf..70e7e7f8ec 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HelixGroupStats.java @@ -2,8 +2,8 @@ import com.linkedin.venice.router.api.routing.helix.HelixGroupSelectionStrategy; import com.linkedin.venice.stats.AbstractVeniceStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class HelixGroupStats extends AbstractVeniceStats { private final Sensor minGroupPendingRequest; private final Sensor avgGroupPendingRequest; - public HelixGroupStats(MetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { + public HelixGroupStats(VeniceMetricsRepository metricsRepository, HelixGroupSelectionStrategy strategy) { super(metricsRepository, "HelixGroupStats"); this.strategy = strategy; diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java index 7b9d1b203b..8d8eafe4e6 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/HostHealthStats.java @@ -2,7 +2,7 @@ import com.linkedin.venice.stats.AbstractVeniceAggStats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -28,7 +28,7 @@ public class HostHealthStats extends AbstractVeniceStats { private Optional unhealthyHostCountCausedByPendingQueueSensor = Optional.empty(); private Optional unhealthyHostCountCausedByHeartBeatSensor = Optional.empty(); - public HostHealthStats(MetricsRepository metricsRepository, String name) { + public HostHealthStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.unhealthyHostOfflineInstance = registerSensor("unhealthy_host_offline_instance", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java index e203fc4d89..6c999fd1df 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpRequestStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.stats.AbstractVeniceStats; import com.linkedin.venice.stats.StatsUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Max; @@ -20,11 +20,11 @@ * to {@link RouteHttpStats} which stores only per type stats. */ public class RouteHttpRequestStats { - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final StorageNodeClient storageNodeClient; private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - public RouteHttpRequestStats(MetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { + public RouteHttpRequestStats(VeniceMetricsRepository metricsRepository, StorageNodeClient storageNodeClient) { this.metricsRepository = metricsRepository; this.storageNodeClient = storageNodeClient; } @@ -58,7 +58,7 @@ static class InternalHostStats extends AbstractVeniceStats { private final Sensor unhealthyPendingRateSensor; private AtomicLong pendingRequestCount; - public InternalHostStats(MetricsRepository metricsRepository, String hostName) { + public InternalHostStats(VeniceMetricsRepository metricsRepository, String hostName) { super(metricsRepository, StatsUtils.convertHostnameToMetricName(hostName)); pendingRequestCount = new AtomicLong(); // pendingRequestCountSensor = diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java index 90bc9f94c4..79de9c6892 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouteHttpStats.java @@ -3,8 +3,8 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.stats.AbstractVeniceHttpStats; import com.linkedin.venice.stats.TehutiUtils; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; import io.tehuti.metrics.stats.Max; @@ -13,10 +13,10 @@ public class RouteHttpStats { private final Map routeStatsMap = new VeniceConcurrentHashMap<>(); - private final MetricsRepository metricsRepository; + private final VeniceMetricsRepository metricsRepository; private final RequestType requestType; - public RouteHttpStats(MetricsRepository metricsRepository, RequestType requestType) { + public RouteHttpStats(VeniceMetricsRepository metricsRepository, RequestType requestType) { this.metricsRepository = metricsRepository; this.requestType = requestType; } @@ -31,7 +31,7 @@ static class InternalRouteHttpStats extends AbstractVeniceHttpStats { private final Sensor responseWaitingTimeSensor; private final Sensor requestSensor; - public InternalRouteHttpStats(MetricsRepository metricsRepository, String hostName, RequestType requestType) { + public InternalRouteHttpStats(VeniceMetricsRepository metricsRepository, String hostName, RequestType requestType) { super(metricsRepository, hostName.replace('.', '_'), requestType); requestSensor = registerSensor("request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java index 35dd99e619..57965d6b06 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterCurrentVersionStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Gauge; @@ -9,7 +9,7 @@ public class RouterCurrentVersionStats extends AbstractVeniceStats { private final Sensor currentVersionNumberSensor; - public RouterCurrentVersionStats(MetricsRepository metricsRepository, String name) { + public RouterCurrentVersionStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); this.currentVersionNumberSensor = registerSensor("current_version", new Gauge(-1)); } diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java index d53abc2177..9927ecbbd4 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterHttpRequestStats.java @@ -1,6 +1,15 @@ package com.linkedin.venice.router.stats; import static com.linkedin.venice.stats.AbstractVeniceAggStats.STORE_NAME_FOR_TOTAL_STAT; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.HTTP_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_CLUSTER_NAME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_METHOD; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_ABORT_REASON; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_RETRY_TYPE; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_REQUEST_VALIDATION_OUTCOME; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_RESPONSE_STATUS_CODE_CATEGORY; +import static com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions.VENICE_STORE_NAME; import com.linkedin.alpini.router.monitoring.ScatterGatherStats; import com.linkedin.venice.common.VeniceSystemStoreUtils; @@ -8,10 +17,22 @@ import com.linkedin.venice.stats.AbstractVeniceHttpStats; import com.linkedin.venice.stats.LambdaStat; import com.linkedin.venice.stats.TehutiUtils; +import com.linkedin.venice.stats.VeniceMetricsConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; +import com.linkedin.venice.stats.VeniceOpenTelemetryMetricFormat; +import com.linkedin.venice.stats.dimensions.VeniceHttpResponseStatusCodeCategory; +import com.linkedin.venice.stats.dimensions.VeniceMetricsDimensions; +import com.linkedin.venice.stats.dimensions.VeniceRequestRetryAbortReason; +import com.linkedin.venice.stats.dimensions.VeniceRequestRetryType; +import com.linkedin.venice.stats.dimensions.VeniceRequestValidationOutcome; +import com.linkedin.venice.stats.dimensions.VeniceResponseStatusCategory; +import io.netty.handler.codec.http.HttpResponseStatus; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.DoubleHistogram; +import io.opentelemetry.api.metrics.LongCounter; import io.tehuti.Metric; import io.tehuti.metrics.MeasurableStat; import io.tehuti.metrics.MetricConfig; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Avg; import io.tehuti.metrics.stats.Count; @@ -27,92 +48,161 @@ public class RouterHttpRequestStats extends AbstractVeniceHttpStats { private static final MetricConfig METRIC_CONFIG = new MetricConfig().timeWindow(10, TimeUnit.SECONDS); - private static final MetricsRepository localMetricRepo = new MetricsRepository(METRIC_CONFIG); + private static final VeniceMetricsRepository localMetricRepo = new VeniceMetricsRepository( + new VeniceMetricsConfig.VeniceMetricsConfigBuilder().setTehutiMetricConfig(METRIC_CONFIG).build()); private final static Sensor totalInflightRequestSensor = localMetricRepo.sensor("total_inflight_request"); static { totalInflightRequestSensor.add("total_inflight_request_count", new Rate()); } - private final Sensor requestSensor; + + /** metrics to track incoming requests */ + private final Sensor incomingRequestSensor; + private final LongCounter incomingRequestSensorOtel; + + /** metrics to track response handling */ private final Sensor healthySensor; private final Sensor unhealthySensor; private final Sensor tardySensor; private final Sensor healthyRequestRateSensor; private final Sensor tardyRequestRatioSensor; private final Sensor throttleSensor; - private final Sensor errorRetryCountSensor; + private final Sensor badRequestSensor; + private final LongCounter requestSensorOtel; + /** latency metrics */ private final Sensor latencySensor; private final Sensor healthyRequestLatencySensor; private final Sensor unhealthyRequestLatencySensor; private final Sensor tardyRequestLatencySensor; private final Sensor throttledRequestLatencySensor; + private final DoubleHistogram latencySensorOtel; + + /** retry metrics */ + private final Sensor errorRetryCountSensor; + private final LongCounter retryTriggeredSensorOtel; + private final Sensor allowedRetryRequestSensor; + private final LongCounter allowedRetryRequestSensorOtel; + private final Sensor disallowedRetryRequestSensor; + private final LongCounter disallowedRetryRequestSensorOtel; + private final Sensor retryDelaySensor; + private final DoubleHistogram retryDelaySensorOtel; + + /** retry aborted metrics */ + private final Sensor delayConstraintAbortedRetryRequest; + private final Sensor slowRouteAbortedRetryRequest; + private final Sensor retryRouteLimitAbortedRetryRequest; + private final Sensor noAvailableReplicaAbortedRetryRequest; + private final LongCounter abortedRetrySensorOtel; + + /** key count metrics */ + private final Sensor keyNumSensor; + private final Sensor badRequestKeyCountSensor; + private final DoubleHistogram keyCountSensorOtel; + + /** OTel metrics yet to be added */ private final Sensor requestSizeSensor; private final Sensor compressedResponseSizeSensor; private final Sensor responseSizeSensor; - private final Sensor badRequestSensor; - private final Sensor badRequestKeyCountSensor; private final Sensor requestThrottledByRouterCapacitySensor; private final Sensor decompressionTimeSensor; private final Sensor routerResponseWaitingTimeSensor; private final Sensor fanoutRequestCountSensor; private final Sensor quotaSensor; private final Sensor findUnhealthyHostRequestSensor; - private final Sensor keyNumSensor; // Reflect the real request usage, e.g count each key as an unit of request usage. private final Sensor requestUsageSensor; private final Sensor requestParsingLatencySensor; private final Sensor requestRoutingLatencySensor; private final Sensor unAvailableRequestSensor; - private final Sensor delayConstraintAbortedRetryRequest; - private final Sensor slowRouteAbortedRetryRequest; - private final Sensor retryRouteLimitAbortedRetryRequest; - private final Sensor noAvailableReplicaAbortedRetryRequest; private final Sensor readQuotaUsageSensor; private final Sensor inFlightRequestSensor; private final AtomicInteger currentInFlightRequest; private final Sensor unavailableReplicaStreamingRequestSensor; - private final Sensor allowedRetryRequestSensor; - private final Sensor disallowedRetryRequestSensor; - private final Sensor errorRetryAttemptTriggeredByPendingRequestCheckSensor; - private final Sensor retryDelaySensor; private final Sensor multiGetFallbackSensor; private final Sensor metaStoreShadowReadSensor; private Sensor keySizeSensor; + + /** TODO: Need to clarify the usage and add new OTel metrics or add it as a part of existing ones */ + private final Sensor errorRetryAttemptTriggeredByPendingRequestCheckSensor; + private final String systemStoreName; + private final Attributes otelMetricDimensions; + private final boolean emitOpenTelemetryMetrics; + private final VeniceOpenTelemetryMetricFormat openTelemetryMetricFormat; // QPS metrics public RouterHttpRequestStats( - MetricsRepository metricsRepository, + VeniceMetricsRepository metricsRepository, String storeName, + String clusterName, RequestType requestType, ScatterGatherStats scatterGatherStats, boolean isKeyValueProfilingEnabled) { super(metricsRepository, storeName, requestType); + emitOpenTelemetryMetrics = metricsRepository.getVeniceMetricsConfig().isEmitOpenTelemetryMetrics(); + openTelemetryMetricFormat = metricsRepository.getVeniceMetricsConfig().getMetricFormat(); + otelMetricDimensions = Attributes.builder() + .put(getDimensionName(VENICE_STORE_NAME), storeName) + .put(getDimensionName(VENICE_REQUEST_METHOD), requestType.getRequestTypeName()) + .put(getDimensionName(VENICE_CLUSTER_NAME), clusterName) + .build(); + this.systemStoreName = VeniceSystemStoreUtils.extractSystemStoreType(storeName); Rate requestRate = new OccurrenceRate(); Rate healthyRequestRate = new OccurrenceRate(); Rate tardyRequestRate = new OccurrenceRate(); - requestSensor = registerSensor("request", new Count(), requestRate); + + incomingRequestSensor = registerSensor("request", new Count(), requestRate); + incomingRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("incoming_call_count", "Number", "Count of all incoming requests"); + healthySensor = registerSensor("healthy_request", new Count(), healthyRequestRate); unhealthySensor = registerSensor("unhealthy_request", new Count()); - unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); tardySensor = registerSensor("tardy_request", new Count(), tardyRequestRate); + throttleSensor = registerSensor("throttled_request", new Count()); healthyRequestRateSensor = registerSensor(new TehutiUtils.SimpleRatioStat(healthyRequestRate, requestRate, "healthy_request_ratio")); tardyRequestRatioSensor = registerSensor(new TehutiUtils.SimpleRatioStat(tardyRequestRate, requestRate, "tardy_request_ratio")); - throttleSensor = registerSensor("throttled_request", new Count()); - errorRetryCountSensor = registerSensor("error_retry", new Count()); badRequestSensor = registerSensor("bad_request", new Count()); - badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); + requestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("call_count", "Number", "Count of all requests with response details"); + + errorRetryCountSensor = registerSensor("error_retry", new Count()); + retryTriggeredSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("retry_call_count", "Number", "Count of retries triggered"); + allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); + allowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("allowed_retry_call_count", "Number", "Count of allowed retry requests"); + disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); + disallowedRetryRequestSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("disallowed_retry_call_count", "Number", "Count of disallowed retry requests"); + errorRetryAttemptTriggeredByPendingRequestCheckSensor = + registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); + retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); + retryDelaySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogramWithoutBuckets("retry_delay", TimeUnit.MILLISECONDS.name(), "Retry delay time"); + + delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); + slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); + retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); + noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); + abortedRetrySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getCounter("aborted_retry_call_count", "Number", "Count of aborted retry requests"); + + unavailableReplicaStreamingRequestSensor = registerSensor("unavailable_replica_streaming_request", new Count()); requestThrottledByRouterCapacitySensor = registerSensor("request_throttled_by_router_capacity", new Count()); fanoutRequestCountSensor = registerSensor("fanout_request_count", new Avg(), new Max(0)); + latencySensor = registerSensorWithDetailedPercentiles("latency", new Avg(), new Max(0)); healthyRequestLatencySensor = registerSensorWithDetailedPercentiles("healthy_request_latency", new Avg(), new Max(0)); unhealthyRequestLatencySensor = registerSensor("unhealthy_request_latency", new Avg(), new Max(0)); tardyRequestLatencySensor = registerSensor("tardy_request_latency", new Avg(), new Max(0)); throttledRequestLatencySensor = registerSensor("throttled_request_latency", new Avg(), new Max(0)); + latencySensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogram("call_time", TimeUnit.MILLISECONDS.name(), "Latency based on all responses"); + routerResponseWaitingTimeSensor = registerSensor( "response_waiting_time", TehutiUtils.getPercentileStat(getName(), getFullMetricName("response_waiting_time"))); @@ -147,6 +237,10 @@ public RouterHttpRequestStats( "retry_faster_than_original_count")); keyNumSensor = registerSensor("key_num", new Avg(), new Max(0)); + badRequestKeyCountSensor = registerSensor("bad_request_key_count", new OccurrenceRate(), new Avg(), new Max()); + keyCountSensorOtel = metricsRepository.getOpenTelemetryMetricsRepository() + .getHistogramWithoutBuckets("call_key_count", "Number", "Count of keys in multi key requests"); + /** * request_usage.Total is incoming KPS while request_usage.OccurrenceRate is QPS */ @@ -158,11 +252,6 @@ public RouterHttpRequestStats( unAvailableRequestSensor = registerSensor("unavailable_request", new Count()); - delayConstraintAbortedRetryRequest = registerSensor("delay_constraint_aborted_retry_request", new Count()); - slowRouteAbortedRetryRequest = registerSensor("slow_route_aborted_retry_request", new Count()); - retryRouteLimitAbortedRetryRequest = registerSensor("retry_route_limit_aborted_retry_request", new Count()); - noAvailableReplicaAbortedRetryRequest = registerSensor("no_available_replica_aborted_retry_request", new Count()); - readQuotaUsageSensor = registerSensor("read_quota_usage_kps", new Total()); inFlightRequestSensor = registerSensor("in_flight_request_count", new Min(), new Max(0), new Avg()); @@ -189,42 +278,48 @@ public RouterHttpRequestStats( } currentInFlightRequest = new AtomicInteger(); - allowedRetryRequestSensor = registerSensor("allowed_retry_request_count", new OccurrenceRate()); - disallowedRetryRequestSensor = registerSensor("disallowed_retry_request_count", new OccurrenceRate()); - errorRetryAttemptTriggeredByPendingRequestCheckSensor = - registerSensor("error_retry_attempt_triggered_by_pending_request_check", new OccurrenceRate()); - retryDelaySensor = registerSensor("retry_delay", new Avg(), new Max()); metaStoreShadowReadSensor = registerSensor("meta_store_shadow_read", new OccurrenceRate()); } + private String getDimensionName(VeniceMetricsDimensions dimension) { + return dimension.getDimensionName(openTelemetryMetricFormat); + } + /** * We record this at the beginning of request handling, so we don't know the latency yet... All specific * types of requests also have their latencies logged at the same time. */ - public void recordRequest() { - requestSensor.record(); + public void recordIncomingRequest() { + incomingRequestSensor.record(); inFlightRequestSensor.record(currentInFlightRequest.incrementAndGet()); totalInflightRequestSensor.record(); + if (emitOpenTelemetryMetrics) { + incomingRequestSensorOtel.add(1, otelMetricDimensions); + } } - public void recordHealthyRequest(Double latency) { + public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) { healthySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.HEALTHY); if (latency != null) { healthyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.HEALTHY); } } - public void recordUnhealthyRequest() { + public void recordUnhealthyRequest(HttpResponseStatus responseStatus) { unhealthySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.UNHEALTHY); } - public void recordUnavailableReplicaStreamingRequest() { - unavailableReplicaStreamingRequestSensor.record(); + public void recordUnhealthyRequest(double latency, HttpResponseStatus responseStatus) { + recordUnhealthyRequest(responseStatus); + unhealthyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.UNHEALTHY); } - public void recordUnhealthyRequest(double latency) { - recordUnhealthyRequest(); - unhealthyRequestLatencySensor.record(latency); + public void recordUnavailableReplicaStreamingRequest() { + unavailableReplicaStreamingRequestSensor.record(); } /** @@ -235,14 +330,17 @@ public void recordReadQuotaUsage(int quotaUsage) { readQuotaUsageSensor.record(quotaUsage); } - public void recordTardyRequest(double latency) { + public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) { tardySensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.TARDY); tardyRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.TARDY); } - public void recordThrottledRequest(double latency) { - recordThrottledRequest(); + public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) { + recordThrottledRequest(responseStatus); throttledRequestLatencySensor.record(latency); + recordLatencySensorOtel(latency, responseStatus, VeniceResponseStatusCategory.THROTTLED); } /** @@ -252,20 +350,46 @@ public void recordThrottledRequest(double latency) { * * TODO: Remove this overload after fixing the above. */ - public void recordThrottledRequest() { + public void recordThrottledRequest(HttpResponseStatus responseStatus) { throttleSensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.THROTTLED); } public void recordErrorRetryCount() { errorRetryCountSensor.record(); + recordRetryTriggeredSensorOtel(VeniceRequestRetryType.ERROR_RETRY); + } + + public void recordRetryTriggeredSensorOtel(VeniceRequestRetryType retryType) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_RETRY_TYPE), retryType.getRetryType()) + .build(); + retryTriggeredSensorOtel.add(1, dimensions); + } + } + + public void recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason abortReason) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_RETRY_ABORT_REASON), abortReason.getAbortReason()) + .build(); + abortedRetrySensorOtel.add(1, dimensions); + } } - public void recordBadRequest() { + public void recordBadRequest(HttpResponseStatus responseStatus) { badRequestSensor.record(); + recordRequestSensorOtel(responseStatus, VeniceResponseStatusCategory.BAD_REQUEST); } public void recordBadRequestKeyCount(int keyCount) { badRequestKeyCountSensor.record(keyCount); + if (emitOpenTelemetryMetrics) { + recordKeyCountSensorOtel(keyCount, VeniceRequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED); + } } public void recordRequestThrottledByRouterCapacity() { @@ -282,6 +406,39 @@ public void recordLatency(double latency) { latencySensor.record(latency); } + public void recordLatencySensorOtel( + double latency, + HttpResponseStatus responseStatus, + VeniceResponseStatusCategory veniceResponseStatusCategory) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + // only add HTTP_RESPONSE_STATUS_CODE_CATEGORY to reduce the cardinality for histogram + .put( + getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), + VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) + .build(); + latencySensorOtel.record(latency, dimensions); + } + } + + public void recordRequestSensorOtel( + HttpResponseStatus responseStatus, + VeniceResponseStatusCategory veniceResponseStatusCategory) { + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put( + getDimensionName(HTTP_RESPONSE_STATUS_CODE_CATEGORY), + VeniceHttpResponseStatusCodeCategory.valueOf(responseStatus.code()).getCategory()) + .put(getDimensionName(VENICE_RESPONSE_STATUS_CODE_CATEGORY), veniceResponseStatusCategory.getCategory()) + .put(getDimensionName(HTTP_RESPONSE_STATUS_CODE), responseStatus.codeAsText().toString()) + .build(); + requestSensorOtel.add(1, dimensions); + } + } + public void recordResponseWaitingTime(double waitingTime) { routerResponseWaitingTimeSensor.record(waitingTime); } @@ -312,6 +469,20 @@ public void recordFindUnhealthyHostRequest() { public void recordKeyNum(int keyNum) { keyNumSensor.record(keyNum); + if (emitOpenTelemetryMetrics) { + recordKeyCountSensorOtel(keyNum, VeniceRequestValidationOutcome.VALID); + } + } + + public void recordKeyCountSensorOtel(int keyNum, VeniceRequestValidationOutcome outcome) { + keyNumSensor.record(keyNum); + if (emitOpenTelemetryMetrics) { + Attributes dimensions = Attributes.builder() + .putAll(otelMetricDimensions) + .put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome()) + .build(); + keyCountSensorOtel.record(keyNum, dimensions); + } } public void recordRequestUsage(int usage) { @@ -336,18 +507,22 @@ public void recordUnavailableRequest() { public void recordDelayConstraintAbortedRetryRequest() { delayConstraintAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_DELAY_CONSTRAINT); } public void recordSlowRouteAbortedRetryRequest() { slowRouteAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_SLOW_ROUTE); } public void recordRetryRouteLimitAbortedRetryRequest() { retryRouteLimitAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_MAX_RETRY_ROUTE_LIMIT); } public void recordNoAvailableReplicaAbortedRetryRequest() { noAvailableReplicaAbortedRetryRequest.record(); + recordAbortedRetrySensorOtel(VeniceRequestRetryAbortReason.RETRY_ABORTED_BY_NO_AVAILABLE_REPLICA); } public void recordKeySizeInByte(long keySize) { @@ -358,7 +533,7 @@ public void recordKeySizeInByte(long keySize) { public void recordResponse() { /** - * We already report into the sensor when the request starts, in {@link #recordRequest()}, so at response time + * We already report into the sensor when the request starts, in {@link #recordIncomingRequest()}, so at response time * there is no need to record into the sensor again. We just want to maintain the bookkeeping. */ currentInFlightRequest.decrementAndGet(); @@ -367,10 +542,12 @@ public void recordResponse() { public void recordAllowedRetryRequest() { allowedRetryRequestSensor.record(); + allowedRetryRequestSensorOtel.add(1, otelMetricDimensions); } public void recordDisallowedRetryRequest() { disallowedRetryRequestSensor.record(); + disallowedRetryRequestSensorOtel.add(1, otelMetricDimensions); } public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { @@ -379,6 +556,9 @@ public void recordErrorRetryAttemptTriggeredByPendingRequestCheck() { public void recordRetryDelay(double delay) { retryDelaySensor.record(delay); + if (emitOpenTelemetryMetrics) { + retryDelaySensorOtel.record(delay, otelMetricDimensions); + } } public void recordMetaStoreShadowRead() { diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java index 028a5c728a..42fbbd4b74 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/RouterThrottleStats.java @@ -1,7 +1,7 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Count; @@ -9,7 +9,7 @@ public class RouterThrottleStats extends AbstractVeniceStats { private final Sensor routerThrottleSensor; - public RouterThrottleStats(MetricsRepository repository, String name) { + public RouterThrottleStats(VeniceMetricsRepository repository, String name) { super(repository, name); routerThrottleSensor = registerSensor("router_throttled_request", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java index 596a6564b0..14347ccc5d 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/SecurityStats.java @@ -2,7 +2,7 @@ import com.linkedin.alpini.netty4.ssl.SslInitializer; import com.linkedin.venice.stats.AbstractVeniceStats; -import io.tehuti.metrics.MetricsRepository; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.AsyncGauge; import io.tehuti.metrics.stats.Avg; @@ -19,7 +19,7 @@ public class SecurityStats extends AbstractVeniceStats { private final Sensor sslLiveConnectionCount; private final Sensor nonSslConnectionCount; - public SecurityStats(MetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { + public SecurityStats(VeniceMetricsRepository repository, String name, IntSupplier secureConnectionCountSupplier) { super(repository, name); this.secureConnectionCountSupplier = secureConnectionCountSupplier; this.sslErrorCount = registerSensor("ssl_error", new Count()); diff --git a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java index 21ce6805c0..693de6fa14 100644 --- a/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java +++ b/services/venice-router/src/main/java/com/linkedin/venice/router/stats/StaleVersionStats.java @@ -1,8 +1,8 @@ package com.linkedin.venice.router.stats; import com.linkedin.venice.stats.AbstractVeniceStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.concurrent.VeniceConcurrentHashMap; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import io.tehuti.metrics.stats.Max; import io.tehuti.metrics.stats.OccurrenceRate; @@ -13,7 +13,7 @@ public class StaleVersionStats extends AbstractVeniceStats { private final VeniceConcurrentHashMap staleVersionReasonStats = new VeniceConcurrentHashMap<>(); - public StaleVersionStats(MetricsRepository metricsRepository, String name) { + public StaleVersionStats(VeniceMetricsRepository metricsRepository, String name) { super(metricsRepository, name); staleVersionStat = registerSensor("stale_version_delta", new Max()); for (StaleVersionReason reason: StaleVersionReason.values()) { diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java index 3490d97483..2af917f1bd 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/AggRouterHttpRequestStatsTest.java @@ -1,10 +1,12 @@ package com.linkedin.venice.router; +import static io.netty.handler.codec.http.HttpResponseStatus.TOO_MANY_REQUESTS; + import com.linkedin.venice.meta.ReadOnlyStoreRepository; import com.linkedin.venice.read.RequestType; import com.linkedin.venice.router.stats.AggRouterHttpRequestStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; -import io.tehuti.metrics.MetricsRepository; import org.mockito.Mockito; import org.testng.Assert; import org.testng.annotations.BeforeSuite; @@ -12,13 +14,13 @@ public class AggRouterHttpRequestStatsTest { - MetricsRepository metricsRepository; + VeniceMetricsRepository metricsRepository; private MockTehutiReporter reporter; private ReadOnlyStoreRepository storeMetadataRepository; @BeforeSuite public void setUp() { - this.metricsRepository = new MetricsRepository(); + this.metricsRepository = new VeniceMetricsRepository(); reporter = new MockTehutiReporter(); metricsRepository.addReporter(reporter); storeMetadataRepository = Mockito.mock(ReadOnlyStoreRepository.class); @@ -26,8 +28,12 @@ public void setUp() { @Test public void testAggRouterMetrics() { - AggRouterHttpRequestStats stats = - new AggRouterHttpRequestStats(metricsRepository, RequestType.SINGLE_GET, storeMetadataRepository, true); + AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( + metricsRepository, + "test-cluster", + RequestType.SINGLE_GET, + storeMetadataRepository, + true); stats.recordRequest("store5"); Assert.assertEquals(reporter.query(".total--request.Count").value(), 1d); @@ -37,8 +43,8 @@ public void testAggRouterMetrics() { Assert.assertNotNull(metricsRepository.getMetric(".store1--request.Count")); Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d); - stats.recordThrottledRequest("store1", 1.0); - stats.recordThrottledRequest("store2", 1.0); + stats.recordThrottledRequest("store1", 1.0, TOO_MANY_REQUESTS); + stats.recordThrottledRequest("store2", 1.0, TOO_MANY_REQUESTS); stats.recordErrorRetryCount("store1"); Assert.assertEquals(reporter.query(".total--request.Count").value(), 2d); Assert.assertEquals(reporter.query(".store1--request.Count").value(), 1d); @@ -59,8 +65,13 @@ public void testAggRouterMetrics() { @Test public void testProfilingMetrics() { - AggRouterHttpRequestStats stats = - new AggRouterHttpRequestStats(metricsRepository, RequestType.COMPUTE, true, storeMetadataRepository, true); + AggRouterHttpRequestStats stats = new AggRouterHttpRequestStats( + metricsRepository, + "test-cluster", + RequestType.COMPUTE, + true, + storeMetadataRepository, + true); for (int i = 1; i <= 100; i += 1) { stats.recordKeySize("store1", i); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java index a364f4a166..fdfa96bb08 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/RouteHttpRequestStatsTest.java @@ -7,9 +7,9 @@ import com.linkedin.venice.router.httpclient.StorageNodeClient; import com.linkedin.venice.router.stats.RouteHttpRequestStats; import com.linkedin.venice.router.stats.RouterHttpRequestStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.Test; @@ -22,13 +22,18 @@ public class RouteHttpRequestStatsTest { @BeforeSuite public void setUp() { - MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedMetricsRepository(); + VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); reporter = new MockTehutiReporter(); metrics.addReporter(reporter); stats = new RouteHttpRequestStats(metrics, mock(StorageNodeClient.class)); - routerHttpRequestStats = - new RouterHttpRequestStats(metrics, "", RequestType.SINGLE_GET, mock(ScatterGatherStats.class), false); + routerHttpRequestStats = new RouterHttpRequestStats( + metrics, + "test-store", + "test-cluster", + RequestType.SINGLE_GET, + mock(ScatterGatherStats.class), + false); } @Test @@ -46,7 +51,7 @@ public void routerMetricsTest() { Assert.assertEquals(stats.getPendingRequestCount("my_host1"), 1); Assert.assertEquals(stats.getPendingRequestCount("my_host2"), 0); - routerHttpRequestStats.recordRequest(); + routerHttpRequestStats.recordIncomingRequest(); Assert.assertTrue(RouterHttpRequestStats.hasInFlightRequests()); } } diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java index e558337e71..01377fc91b 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceDelegateMode.java @@ -39,10 +39,10 @@ import com.linkedin.venice.router.stats.RouterStats; import com.linkedin.venice.router.throttle.ReadRequestThrottler; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.HelixUtils; import com.linkedin.venice.utils.Utils; import io.netty.handler.codec.http.HttpMethod; -import io.tehuti.metrics.MetricsRepository; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -225,7 +225,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true))); @@ -378,7 +379,8 @@ public void testLeastLoadedOnSlowHosts() throws RouterException { config, new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true)), @@ -702,7 +704,7 @@ public void testScatterForMultiGetWithHelixAssistedRouting() throws RouterExcept doReturn(1).when(helixInstanceConfigRepository).getInstanceGroupId(instance4.getNodeId()); HelixGroupSelector helixGroupSelector = new HelixGroupSelector( - new MetricsRepository(), + new VeniceMetricsRepository(), helixInstanceConfigRepository, HelixGroupSelectionStrategyEnum.ROUND_ROBIN, mock(TimeoutProcessor.class)); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java index b86f169b50..2ab4e77908 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVenicePathParser.java @@ -34,6 +34,7 @@ import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; import com.linkedin.venice.serializer.RecordSerializer; import com.linkedin.venice.serializer.SerializerDeserializerFactory; +import com.linkedin.venice.stats.VeniceMetricsRepository; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.DefaultHttpHeaders; @@ -41,7 +42,6 @@ import io.netty.handler.codec.http.HttpHeaders; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.nio.ByteBuffer; import java.util.AbstractMap; import java.util.ArrayList; @@ -100,7 +100,8 @@ public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + CLUSTER, requestType, mock(ReadOnlyStoreRepository.class), true))); @@ -128,7 +129,7 @@ public void testParseResourceUri_ComputeRequest() throws RouterException { storeRepository, mock(VeniceRouterConfig.class), mock(CompressorFactory.class), - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); String storeName = "test-store"; @@ -190,7 +191,7 @@ public void parsesQueries() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); BasicFullHttpRequest request = new BasicFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, uri, 0, 0); VenicePath path = parser.parseResourceUri(uri, request); @@ -221,7 +222,7 @@ public void parsesB64Uri() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)).parseResourceUri(myUri, request); ByteBuffer partitionKey = path.getPartitionKey().getKeyBuffer(); Assert.assertEquals( @@ -242,7 +243,7 @@ public void failsToParseOtherActions() throws RouterException { mock(ReadOnlyStoreRepository.class), MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)).parseResourceUri("/badAction/storeName/key"); } @@ -289,7 +290,7 @@ public void parseRequestWithBatchSizeViolation() throws RouterException { storeRepository, MOCK_ROUTER_CONFIG, compressorFactory, - mock(MetricsRepository.class), + mock(VeniceMetricsRepository.class), mock(ScheduledExecutorService.class)); try { pathParser.parseResourceUri(myUri, request); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java index 3075773715..0d01bea6a2 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceResponseAggregator.java @@ -219,7 +219,7 @@ public void testBuildResponseForMultiGet() { FullHttpResponse response5 = buildFullHttpResponse(TOO_MANY_REQUESTS, new byte[0], headers); metrics.setMetric(MetricNames.ROUTER_SERVER_TIME, new TimeValue(1, TimeUnit.MILLISECONDS)); responseAggregator.buildResponse(request, metrics, Collections.singletonList(response5)); - verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0); + verify(mockStatsForMultiGet).recordThrottledRequest(storeName, 1.0, TOO_MANY_REQUESTS); } @Test diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java index 436ac12ffe..d70afcd8ac 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/TestVeniceVersionFinder.java @@ -28,11 +28,11 @@ import com.linkedin.venice.meta.VersionStatus; import com.linkedin.venice.meta.ZKStore; import com.linkedin.venice.router.stats.StaleVersionStats; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestUtils; import com.linkedin.venice.utils.Utils; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import io.tehuti.metrics.Sensor; import java.nio.ByteBuffer; import java.util.HashMap; @@ -75,7 +75,7 @@ public void throws404onMissingStore() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { versionFinder.getVersion("", request); Assert.fail( @@ -115,7 +115,7 @@ public void throws301onMigratedStore() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { request.headers().add(HttpConstants.VENICE_ALLOW_REDIRECT, "1"); versionFinder.getVersion("store", request); @@ -150,7 +150,7 @@ public void returnNonExistingVersionOnceStoreIsDisabled() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); try { versionFinder.getVersion(storeName, request); Assert.fail("Store should be disabled and forbidden to read."); @@ -190,7 +190,7 @@ public void testSwapsVersionWhenAllPartitionsAreOnline() { HelixReadOnlyStoreConfigRepository storeConfigRepo = mock(HelixReadOnlyStoreConfigRepository.class); CompressorFactory compressorFactory = mock(CompressorFactory.class); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); @@ -277,7 +277,7 @@ public void returnsCurrentVersionWhenTheDictionaryExists() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); String firstVersionKafkaTopic = Version.composeKafkaTopic(storeName, firstVersion); @@ -326,7 +326,7 @@ public void returnsCurrentVersionWhenItIsTheOnlyOption() { clusterToD2Map, CLUSTER, compressorFactory, - mock(MetricsRepository.class)); + mock(VeniceMetricsRepository.class)); String firstVersionKafkaTopic = Version.composeKafkaTopic(storeName, firstVersion); @@ -361,7 +361,7 @@ public void returnsPreviousVersionWhenDictionaryNotDownloaded() { doReturn(true).when(routingDataRepo).containsKafkaTopic(anyString()); CompressorFactory compressorFactory = mock(CompressorFactory.class); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); @@ -419,7 +419,7 @@ public void returnsNewVersionWhenDictionaryDownloads() { doReturn(3).when(routingDataRepo).getNumberOfPartitions(anyString()); doReturn(instances).when(routingDataRepo).getReadyToServeInstances(anyString(), anyInt()); doReturn(true).when(routingDataRepo).containsKafkaTopic(anyString()); - MetricsRepository mockMetricsRepository = mock(MetricsRepository.class); + VeniceMetricsRepository mockMetricsRepository = mock(VeniceMetricsRepository.class); final Sensor mockSensor = mock(Sensor.class); doReturn(mockSensor).when(mockMetricsRepository).sensor(anyString(), any()); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java index 3a0cbc98f0..f4100ff399 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVeniceMultiGetPath.java @@ -20,11 +20,11 @@ import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; import com.linkedin.venice.serializer.RecordSerializer; import com.linkedin.venice.serializer.SerializerDeserializerFactory; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.Utils; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpVersion; -import io.tehuti.metrics.MetricsRepository; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -37,14 +37,15 @@ public class TestVeniceMultiGetPath { private final RetryManager disabledRetryManager = - new RetryManager(new MetricsRepository(), "disabled-test-retry-manager", 0, 0, null); + new RetryManager(new VeniceMetricsRepository(), "disabled-test-retry-manager", 0, 0, null); @BeforeClass public void setUp() { RouterExceptionAndTrackingUtils.setRouterStats( new RouterStats<>( requestType -> new AggRouterHttpRequestStats( - new MetricsRepository(), + new VeniceMetricsRepository(), + "test-cluster", requestType, mock(ReadOnlyStoreRepository.class), true))); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java index da07b2f926..eef224113d 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/api/path/TestVenicePath.java @@ -10,12 +10,12 @@ import com.linkedin.venice.read.RequestType; import com.linkedin.venice.router.api.RouterKey; import com.linkedin.venice.schema.avro.ReadAvroProtocolDefinition; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.utils.TestMockTime; import com.linkedin.venice.utils.TestUtils; import com.linkedin.venice.utils.Time; import io.netty.handler.codec.http.HttpMethod; import io.netty.handler.codec.http.HttpResponseStatus; -import io.tehuti.metrics.MetricsRepository; import java.time.Clock; import java.util.Collection; import java.util.concurrent.ScheduledExecutorService; @@ -83,13 +83,13 @@ public String getLocation() { } private RetryManager disabledRetryManager; - private MetricsRepository metricsRepository; + private VeniceMetricsRepository metricsRepository; private final ScheduledExecutorService retryManagerScheduler = Executors.newScheduledThreadPool(1); @BeforeMethod public void setUp() { - metricsRepository = new MetricsRepository(); + metricsRepository = new VeniceMetricsRepository(); // retry manager is disabled by default disabledRetryManager = new RetryManager(metricsRepository, "disabled-test-retry-manager", 0, 0, retryManagerScheduler); diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java index 57e82128ff..35364da6b1 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/stats/AdminOperationsStatsTest.java @@ -3,9 +3,9 @@ import static org.mockito.Mockito.*; import com.linkedin.venice.router.VeniceRouterConfig; +import com.linkedin.venice.stats.VeniceMetricsRepository; import com.linkedin.venice.tehuti.MockTehutiReporter; import com.linkedin.venice.utils.metrics.MetricsRepositoryUtils; -import io.tehuti.metrics.MetricsRepository; import org.testng.Assert; import org.testng.annotations.Test; @@ -13,7 +13,7 @@ public class AdminOperationsStatsTest { @Test public void testAdminOperationsStats() { - MetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedMetricsRepository(); + VeniceMetricsRepository metrics = MetricsRepositoryUtils.createSingleThreadedVeniceMetricsRepository(); MockTehutiReporter reporter = new MockTehutiReporter(); metrics.addReporter(reporter); VeniceRouterConfig mockConfig = mock(VeniceRouterConfig.class); diff --git a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java index 8305d622d6..1fe0116fc9 100644 --- a/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java +++ b/services/venice-server/src/main/java/com/linkedin/venice/stats/AggServerHttpRequestStats.java @@ -25,7 +25,7 @@ public AggServerHttpRequestStats( unregisterMetricForDeletedStoreEnabled); } - static class ServerHttpRequestStatsSupplier implements StatsSupplier { + static class ServerHttpRequestStatsSupplier implements StatsSupplierMetricsRepository { private final RequestType requestType; private final boolean isKeyValueProfilingEnabled;