From 2496cfdf5160a086f040251b5261964632579472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Francisco=20L=C3=B3pez?= Date: Thu, 11 Apr 2024 22:32:52 +0200 Subject: [PATCH] feat: Conditionally emit metrics based on enablement (#19903) Co-authored-by: Aleksandr Bezobchuk --- CHANGELOG.md | 4 +++ server/start.go | 4 --- telemetry/metrics.go | 10 +++++++ telemetry/wrapper.go | 37 +++++++++++++++++++++++++ telemetry/wrapper_test.go | 51 +++++++++++++++++++++++++++++++++++ x/circuit/module.go | 3 +-- x/crisis/abci.go | 3 +-- x/crisis/module.go | 4 +-- x/distribution/keeper/abci.go | 4 +-- x/evidence/keeper/abci.go | 3 +-- x/gov/keeper/abci.go | 2 +- x/mint/keeper/abci.go | 3 +-- x/slashing/abci.go | 3 +-- x/staking/keeper/abci.go | 6 ++--- x/upgrade/keeper/abci.go | 3 +-- 15 files changed, 114 insertions(+), 26 deletions(-) create mode 100644 telemetry/wrapper_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index d7385cb08db..f130bc2a385 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,10 @@ Every module contains its own CHANGELOG.md. Please refer to the module you are i ### Improvements +* (telemetry) [#19903](https://github.com/cosmos/cosmos-sdk/pull/19903) Conditionally emit metrics based on enablement. + * **Introduction of `Now` Function**: Added a new function called `Now` to the telemetry package. It returns the current system time if telemetry is enabled, or a zero time if telemetry is not enabled. + * **Atomic Global Variable**: Implemented an atomic global variable to manage the state of telemetry's enablement. This ensures thread safety for the telemetry state. + * **Conditional Telemetry Emission**: All telemetry functions have been updated to emit metrics only when telemetry is enabled. They perform a check with `isTelemetryEnabled()` and return early if telemetry is disabled, minimizing unnecessary operations and overhead. * (types) [#19869](https://github.com/cosmos/cosmos-sdk/pull/19869) Removed `Any` type from `codec/types` and replaced it with an alias for `cosmos/gogoproto/types/any`. * (server) [#19854](https://github.com/cosmos/cosmos-sdk/pull/19854) Add customizability to start command. * Add `StartCmdOptions` in `server.AddCommands` instead of `servertypes.ModuleInitFlags`. To set custom flags set them in the `StartCmdOptions` struct on the `AddFlags` field. diff --git a/server/start.go b/server/start.go index cfed5a393a1..4c2d3902205 100644 --- a/server/start.go +++ b/server/start.go @@ -533,10 +533,6 @@ func startAPIServer( } func startTelemetry(cfg serverconfig.Config) (*telemetry.Metrics, error) { - if !cfg.Telemetry.Enabled { - return nil, nil - } - return telemetry.New(cfg.Telemetry) } diff --git a/telemetry/metrics.go b/telemetry/metrics.go index 502fdb61120..07d1020eb89 100644 --- a/telemetry/metrics.go +++ b/telemetry/metrics.go @@ -14,6 +14,15 @@ import ( "github.com/prometheus/common/expfmt" ) +// globalTelemetryEnabled is a private variable that stores the telemetry enabled state. +// It is set on initialization and does not change for the lifetime of the program. +var globalTelemetryEnabled bool + +// IsTelemetryEnabled provides controlled access to check if telemetry is enabled. +func IsTelemetryEnabled() bool { + return globalTelemetryEnabled +} + // globalLabels defines the set of global labels that will be applied to all // metrics emitted using the telemetry package function wrappers. var globalLabels = []metrics.Label{} @@ -95,6 +104,7 @@ type GatherResponse struct { // New creates a new instance of Metrics func New(cfg Config) (_ *Metrics, rerr error) { + globalTelemetryEnabled = cfg.Enabled if !cfg.Enabled { return nil, nil } diff --git a/telemetry/wrapper.go b/telemetry/wrapper.go index 4cd96b78f71..da11f1fa045 100644 --- a/telemetry/wrapper.go +++ b/telemetry/wrapper.go @@ -24,6 +24,10 @@ func NewLabel(name, value string) metrics.Label { // metric for a module with a given set of keys. If any global labels are defined, // they will be added to the module label. func ModuleMeasureSince(module string, start time.Time, keys ...string) { + if !IsTelemetryEnabled() { + return + } + metrics.MeasureSinceWithLabels( keys, start.UTC(), @@ -35,6 +39,10 @@ func ModuleMeasureSince(module string, start time.Time, keys ...string) { // module with a given set of keys. If any global labels are defined, they will // be added to the module label. func ModuleSetGauge(module string, val float32, keys ...string) { + if !IsTelemetryEnabled() { + return + } + metrics.SetGaugeWithLabels( keys, val, @@ -45,29 +53,58 @@ func ModuleSetGauge(module string, val float32, keys ...string) { // IncrCounter provides a wrapper functionality for emitting a counter metric with // global labels (if any). func IncrCounter(val float32, keys ...string) { + if !IsTelemetryEnabled() { + return + } + metrics.IncrCounterWithLabels(keys, val, globalLabels) } // IncrCounterWithLabels provides a wrapper functionality for emitting a counter // metric with global labels (if any) along with the provided labels. func IncrCounterWithLabels(keys []string, val float32, labels []metrics.Label) { + if !IsTelemetryEnabled() { + return + } + metrics.IncrCounterWithLabels(keys, val, append(labels, globalLabels...)) } // SetGauge provides a wrapper functionality for emitting a gauge metric with // global labels (if any). func SetGauge(val float32, keys ...string) { + if !IsTelemetryEnabled() { + return + } + metrics.SetGaugeWithLabels(keys, val, globalLabels) } // SetGaugeWithLabels provides a wrapper functionality for emitting a gauge // metric with global labels (if any) along with the provided labels. func SetGaugeWithLabels(keys []string, val float32, labels []metrics.Label) { + if !IsTelemetryEnabled() { + return + } + metrics.SetGaugeWithLabels(keys, val, append(labels, globalLabels...)) } // MeasureSince provides a wrapper functionality for emitting a a time measure // metric with global labels (if any). func MeasureSince(start time.Time, keys ...string) { + if !IsTelemetryEnabled() { + return + } + metrics.MeasureSinceWithLabels(keys, start.UTC(), globalLabels) } + +// Now return the current time if telemetry is enabled or a zero time if it's not +func Now() time.Time { + if !IsTelemetryEnabled() { + return time.Time{} + } + + return time.Now() +} diff --git a/telemetry/wrapper_test.go b/telemetry/wrapper_test.go new file mode 100644 index 00000000000..5388839874b --- /dev/null +++ b/telemetry/wrapper_test.go @@ -0,0 +1,51 @@ +package telemetry + +import ( + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +var mu sync.Mutex + +func initTelemetry(v bool) { + globalTelemetryEnabled = v +} + +// Reset the global state to a known disabled state before each test. +func setupTest(t *testing.T) { + t.Helper() + mu.Lock() // Ensure no other test can modify global state at the same time. + defer mu.Unlock() + initTelemetry(false) +} + +// TestNow tests the Now function when telemetry is enabled and disabled. +func TestNow(t *testing.T) { + setupTest(t) // Locks the mutex to avoid race condition. + + initTelemetry(true) + telemetryTime := Now() + assert.NotEqual(t, time.Time{}, telemetryTime, "Now() should not return zero time when telemetry is enabled") + + setupTest(t) // Reset the global state and lock the mutex again. + + initTelemetry(false) + telemetryTime = Now() + assert.Equal(t, time.Time{}, telemetryTime, "Now() should return zero time when telemetry is disabled") +} + +// TestIsTelemetryEnabled tests the IsTelemetryEnabled function. +func TestIsTelemetryEnabled(t *testing.T) { + setupTest(t) // Locks the mutex to avoid race condition. + + initTelemetry(true) + assert.True(t, IsTelemetryEnabled(), "IsTelemetryEnabled() should return true when globalTelemetryEnabled is set to true") + + setupTest(t) // Reset the global state and lock the mutex again. + + initTelemetry(false) + assert.False(t, IsTelemetryEnabled(), "IsTelemetryEnabled() should return false when globalTelemetryEnabled is set to false") +} diff --git a/x/circuit/module.go b/x/circuit/module.go index 5f14b29e4c8..8f7e4b4e389 100644 --- a/x/circuit/module.go +++ b/x/circuit/module.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "time" gwruntime "github.com/grpc-ecosystem/grpc-gateway/runtime" "google.golang.org/grpc" @@ -93,7 +92,7 @@ func (am AppModule) ValidateGenesis(bz json.RawMessage) error { // InitGenesis performs genesis initialization for the circuit module. func (am AppModule) InitGenesis(ctx context.Context, data json.RawMessage) error { - start := time.Now() + start := telemetry.Now() var genesisState types.GenesisState if err := am.cdc.UnmarshalJSON(data, &genesisState); err != nil { return err diff --git a/x/crisis/abci.go b/x/crisis/abci.go index 8ce275ecd79..2eacc641c4a 100644 --- a/x/crisis/abci.go +++ b/x/crisis/abci.go @@ -2,7 +2,6 @@ package crisis import ( "context" - "time" "github.com/cosmos/cosmos-sdk/telemetry" sdk "github.com/cosmos/cosmos-sdk/types" @@ -12,7 +11,7 @@ import ( // check all registered invariants func EndBlocker(ctx context.Context, k keeper.Keeper) { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyEndBlocker) sdkCtx := sdk.UnwrapSDKContext(ctx) if k.InvCheckPeriod() == 0 || sdkCtx.BlockHeight()%int64(k.InvCheckPeriod()) != 0 { diff --git a/x/crisis/module.go b/x/crisis/module.go index 807a276aca9..7da203a6a0f 100644 --- a/x/crisis/module.go +++ b/x/crisis/module.go @@ -4,7 +4,6 @@ import ( "context" "encoding/json" "fmt" - "time" "github.com/spf13/cobra" "google.golang.org/grpc" @@ -118,12 +117,11 @@ func (am AppModule) ValidateGenesis(bz json.RawMessage) error { // InitGenesis performs genesis initialization for the crisis module. func (am AppModule) InitGenesis(ctx context.Context, data json.RawMessage) error { - start := time.Now() var genesisState types.GenesisState if err := am.cdc.UnmarshalJSON(data, &genesisState); err != nil { return err } - telemetry.MeasureSince(start, "InitGenesis", "crisis", "unmarshal") + telemetry.MeasureSince(telemetry.Now(), "InitGenesis", "crisis", "unmarshal") am.keeper.InitGenesis(ctx, &genesisState) if !am.skipGenesisInvariants { diff --git a/x/distribution/keeper/abci.go b/x/distribution/keeper/abci.go index 544b62e1e69..60d1aea984a 100644 --- a/x/distribution/keeper/abci.go +++ b/x/distribution/keeper/abci.go @@ -1,8 +1,6 @@ package keeper import ( - "time" - "cosmossdk.io/x/distribution/types" "github.com/cosmos/cosmos-sdk/telemetry" @@ -13,7 +11,7 @@ import ( // and distribute rewards for the previous block. // TODO: use context.Context after including the comet service func (k Keeper) BeginBlocker(ctx sdk.Context) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) // determine the total power signing the block var previousTotalPower int64 diff --git a/x/evidence/keeper/abci.go b/x/evidence/keeper/abci.go index 0ad33d81a73..5e5321e40f3 100644 --- a/x/evidence/keeper/abci.go +++ b/x/evidence/keeper/abci.go @@ -3,7 +3,6 @@ package keeper import ( "context" "fmt" - "time" "cosmossdk.io/core/comet" "cosmossdk.io/x/evidence/types" @@ -15,7 +14,7 @@ import ( // BeginBlocker iterates through and handles any newly discovered evidence of // misbehavior submitted by CometBFT. Currently, only equivocation is handled. func (k Keeper) BeginBlocker(ctx context.Context) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) bi := sdk.UnwrapSDKContext(ctx).CometInfo() diff --git a/x/gov/keeper/abci.go b/x/gov/keeper/abci.go index 4e4052102b3..4969d4abfbd 100644 --- a/x/gov/keeper/abci.go +++ b/x/gov/keeper/abci.go @@ -21,7 +21,7 @@ import ( // EndBlocker is called every block. func (k Keeper) EndBlocker(ctx context.Context) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyEndBlocker) logger := k.Logger() // delete dead proposals from store and returns theirs deposits. diff --git a/x/mint/keeper/abci.go b/x/mint/keeper/abci.go index 8c3c1b3a4b0..04677ad2f31 100644 --- a/x/mint/keeper/abci.go +++ b/x/mint/keeper/abci.go @@ -2,7 +2,6 @@ package keeper import ( "context" - "time" "cosmossdk.io/core/event" "cosmossdk.io/x/mint/types" @@ -13,7 +12,7 @@ import ( // BeginBlocker mints new tokens for the previous block. func (k Keeper) BeginBlocker(ctx context.Context, ic types.InflationCalculationFn) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) // fetch stored minter & params minter, err := k.Minter.Get(ctx) diff --git a/x/slashing/abci.go b/x/slashing/abci.go index 20613b40cfd..72645213d82 100644 --- a/x/slashing/abci.go +++ b/x/slashing/abci.go @@ -2,7 +2,6 @@ package slashing import ( "context" - "time" "cosmossdk.io/x/slashing/keeper" "cosmossdk.io/x/slashing/types" @@ -14,7 +13,7 @@ import ( // BeginBlocker check for infraction evidence or downtime of validators // on every begin block func BeginBlocker(ctx context.Context, k keeper.Keeper) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) // Iterate over all the validators which *should* have signed this block // store whether or not they have actually signed it and slash/unbond any diff --git a/x/staking/keeper/abci.go b/x/staking/keeper/abci.go index 1e70b2c6bb1..59db9be890c 100644 --- a/x/staking/keeper/abci.go +++ b/x/staking/keeper/abci.go @@ -2,7 +2,6 @@ package keeper import ( "context" - "time" "cosmossdk.io/core/appmodule" "cosmossdk.io/x/staking/types" @@ -13,12 +12,13 @@ import ( // BeginBlocker will persist the current header and validator set as a historical entry // and prune the oldest entry based on the HistoricalEntries parameter func (k *Keeper) BeginBlocker(ctx context.Context) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) return k.TrackHistoricalInfo(ctx) } // EndBlocker called at every block, update validator set func (k *Keeper) EndBlocker(ctx context.Context) ([]appmodule.ValidatorUpdate, error) { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyEndBlocker) + start := telemetry.Now() + defer telemetry.ModuleMeasureSince(types.ModuleName, start, telemetry.MetricKeyEndBlocker) return k.BlockValidatorUpdates(ctx) } diff --git a/x/upgrade/keeper/abci.go b/x/upgrade/keeper/abci.go index 6ef39c8062b..bd2844dcbf2 100644 --- a/x/upgrade/keeper/abci.go +++ b/x/upgrade/keeper/abci.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "time" storetypes "cosmossdk.io/store/types" "cosmossdk.io/x/upgrade/types" @@ -22,7 +21,7 @@ import ( // a migration to be executed if needed upon this switch (migration defined in the new binary) // skipUpgradeHeightArray is a set of block heights for which the upgrade must be skipped func (k Keeper) PreBlocker(ctx context.Context) error { - defer telemetry.ModuleMeasureSince(types.ModuleName, time.Now(), telemetry.MetricKeyBeginBlocker) + defer telemetry.ModuleMeasureSince(types.ModuleName, telemetry.Now(), telemetry.MetricKeyBeginBlocker) blockHeight := k.environment.HeaderService.GetHeaderInfo(ctx).Height plan, err := k.GetUpgradePlan(ctx)