From fde1844131f40f96186fa624b1bed3146847c91c Mon Sep 17 00:00:00 2001 From: Jake Engelberg Date: Tue, 15 Oct 2024 13:41:19 -0400 Subject: [PATCH] Revert "Revert "wip: error code monitoring/metric recording"" This reverts commit 44c31be7f0491b571c02ef5ba90e2efd4c447836. --- charts/dapr/crds/configuration.yaml | 4 ++ pkg/api/http/http.go | 1 + pkg/apis/configuration/v1alpha1/types.go | 2 + .../v1alpha1/zz_generated.deepcopy.go | 5 ++ pkg/config/configuration.go | 14 ++++- pkg/diagnostics/errorcode_monitoring.go | 51 +++++++++++++++++++ pkg/diagnostics/metrics.go | 14 +++++ pkg/diagnostics/service_monitoring.go | 1 + 8 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 pkg/diagnostics/errorcode_monitoring.go diff --git a/charts/dapr/crds/configuration.yaml b/charts/dapr/crds/configuration.yaml index b6790b35088..274cc8a89a3 100644 --- a/charts/dapr/crds/configuration.yaml +++ b/charts/dapr/crds/configuration.yaml @@ -266,6 +266,8 @@ spec: description: If true (default is false) HTTP verbs (e.g., GET, POST) are excluded from the metrics. type: boolean type: object + recordErrorCodes: + type: boolean rules: items: description: MetricsRule defines configuration options for a @@ -329,6 +331,8 @@ spec: items: type: integer type: array + recordErrorCodes: + type: boolean rules: items: description: MetricsRule defines configuration options for a diff --git a/pkg/api/http/http.go b/pkg/api/http/http.go index ca09cf29231..be6c78cb350 100644 --- a/pkg/api/http/http.go +++ b/pkg/api/http/http.go @@ -508,6 +508,7 @@ func (a *api) onBulkGetState(reqCtx *fasthttp.RequestCtx) { status := apierrors.StateStore(storeName).InvalidKeyName(k, err.Error()) universalFastHTTPErrorResponder(reqCtx, status) log.Debug(status) + diag.DefaultErrorCodeMonitoring.RecordErrorCode(errorcodes.IllegalKey) return } r := state.GetRequest{ diff --git a/pkg/apis/configuration/v1alpha1/types.go b/pkg/apis/configuration/v1alpha1/types.go index 56a2460d60b..4c9bf973808 100644 --- a/pkg/apis/configuration/v1alpha1/types.go +++ b/pkg/apis/configuration/v1alpha1/types.go @@ -212,6 +212,8 @@ type ZipkinSpec struct { type MetricSpec struct { Enabled *bool `json:"enabled"` // +optional + RecordErrorCodes *bool `json:"recordErrorCodes,omitempty"` + // +optional HTTP *MetricHTTP `json:"http,omitempty"` // +optional Rules []MetricsRule `json:"rules,omitempty"` diff --git a/pkg/apis/configuration/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/configuration/v1alpha1/zz_generated.deepcopy.go index 2c8b5b21ee3..e65a9cb62cd 100644 --- a/pkg/apis/configuration/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/configuration/v1alpha1/zz_generated.deepcopy.go @@ -501,6 +501,11 @@ func (in *MetricSpec) DeepCopyInto(out *MetricSpec) { *out = new(bool) **out = **in } + if in.RecordErrorCodes != nil { + in, out := &in.RecordErrorCodes, &out.RecordErrorCodes + *out = new(bool) + **out = **in + } if in.HTTP != nil { in, out := &in.HTTP, &out.HTTP *out = new(MetricHTTP) diff --git a/pkg/config/configuration.go b/pkg/config/configuration.go index 82c5ce3cea2..a4b09525133 100644 --- a/pkg/config/configuration.go +++ b/pkg/config/configuration.go @@ -254,8 +254,9 @@ func (o OtelSpec) GetIsSecure() bool { // MetricSpec configuration for metrics. type MetricSpec struct { // Defaults to true - Enabled *bool `json:"enabled,omitempty" yaml:"enabled,omitempty"` - HTTP *MetricHTTP `json:"http,omitempty" yaml:"http,omitempty"` + Enabled *bool `json:"enabled,omitempty" yaml:"enabled,omitempty"` + RecordErrorCodes *bool `json:"recordErrorCodes,omitempty"` + HTTP *MetricHTTP `json:"http,omitempty" yaml:"http,omitempty"` // Latency distribution buckets. If not set, the default buckets are used. LatencyDistributionBuckets *[]int `json:"latencyDistributionBuckets,omitempty" yaml:"latencyDistributionBuckets,omitempty"` Rules []MetricsRule `json:"rules,omitempty" yaml:"rules,omitempty"` @@ -317,6 +318,15 @@ func (m MetricSpec) GetHTTPPathMatching() []string { return m.HTTP.PathMatching } +// GetRecordErrorCodes returns true if `recordErrorCodes` is enabled for metrics +func (m MetricSpec) GetRecordErrorCodes() bool { + if m.RecordErrorCodes == nil { + // The default is false + return false + } + return *m.RecordErrorCodes +} + // MetricHTTP defines configuration for metrics for the HTTP server type MetricHTTP struct { // If false, metrics for the HTTP server are collected with increased cardinality. diff --git a/pkg/diagnostics/errorcode_monitoring.go b/pkg/diagnostics/errorcode_monitoring.go new file mode 100644 index 00000000000..2fb4bf1c135 --- /dev/null +++ b/pkg/diagnostics/errorcode_monitoring.go @@ -0,0 +1,51 @@ +package diagnostics + +import ( + "context" + + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" + + diagUtils "github.com/dapr/dapr/pkg/diagnostics/utils" +) + +type errorCodeMetrics struct { + errorCodeCount *stats.Int64Measure + + appID string + ctx context.Context + enabled bool +} + +func newErrorCodeMetrics() *errorCodeMetrics { + return &errorCodeMetrics{ //nolint:exhaustruct + errorCodeCount: stats.Int64( + "error_code/count", + "Number of times an error with a specific errorcode was encountered.", + stats.UnitDimensionless), + + ctx: context.Background(), + enabled: false, + } +} + +// Init registers the errorcode metrics view. +func (m *errorCodeMetrics) Init(id string) error { + m.enabled = true + m.appID = id + + return view.Register( + diagUtils.NewMeasureView(m.errorCodeCount, []tag.Key{appIDKey, errorCodeKey}, view.Count()), + ) +} + +func (m *errorCodeMetrics) RecordErrorCode(code string) { + if m.enabled { + _ = stats.RecordWithTags( + m.ctx, + diagUtils.WithTags(m.errorCodeCount.Name(), appIDKey, m.appID, errorCodeKey, code), + m.errorCodeCount.M(1), + ) + } +} diff --git a/pkg/diagnostics/metrics.go b/pkg/diagnostics/metrics.go index 13a0a07e37c..7798341a5db 100644 --- a/pkg/diagnostics/metrics.go +++ b/pkg/diagnostics/metrics.go @@ -21,6 +21,7 @@ import ( "github.com/dapr/dapr/pkg/config" "github.com/dapr/dapr/pkg/diagnostics/utils" + "github.com/dapr/dapr/pkg/messages/errorcodes" ) // appIDKey is a tag key for App ID. @@ -42,6 +43,8 @@ var ( DefaultResiliencyMonitoring = newResiliencyMetrics() // DefaultWorkflowMonitoring holds workflow specific metrics. DefaultWorkflowMonitoring = newWorkflowMetrics() + // DefaultErrorCodeMonitoring holds error code specific metrics. + DefaultErrorCodeMonitoring = newErrorCodeMetrics() ) // <<10 -> KBs; <<20 -> MBs; <<30 -> GBs @@ -79,6 +82,17 @@ func InitMetrics(appID, namespace string, metricSpec config.MetricSpec) error { return err } + log.Info("jake::: my build!!!!!!!!!!!!!") + if metricSpec.GetRecordErrorCodes() { + if err := DefaultErrorCodeMonitoring.Init(appID); err != nil { + return err + } + log.Info("jake::: error code monitoring success") + DefaultErrorCodeMonitoring.RecordErrorCode(errorcodes.ActorInstanceMissing) + DefaultErrorCodeMonitoring.RecordErrorCode(errorcodes.ActorInstanceMissing) + DefaultErrorCodeMonitoring.RecordErrorCode(errorcodes.PubsubEmpty) + } + // Set reporting period of views view.SetReportingPeriod(DefaultReportingPeriod) return utils.CreateRulesMap(metricSpec.Rules) diff --git a/pkg/diagnostics/service_monitoring.go b/pkg/diagnostics/service_monitoring.go index 585f74ae6dd..d43501eebd9 100644 --- a/pkg/diagnostics/service_monitoring.go +++ b/pkg/diagnostics/service_monitoring.go @@ -23,6 +23,7 @@ var ( namespaceKey = tag.MustNewKey("namespace") resiliencyNameKey = tag.MustNewKey("name") policyKey = tag.MustNewKey("policy") + errorCodeKey = tag.MustNewKey("errorcode") componentNameKey = tag.MustNewKey("componentName") destinationAppIDKey = tag.MustNewKey("dst_app_id") sourceAppIDKey = tag.MustNewKey("src_app_id")