diff --git a/core/services/workflows/engine.go b/core/services/workflows/engine.go index 943802d1962..d153e53bc07 100644 --- a/core/services/workflows/engine.go +++ b/core/services/workflows/engine.go @@ -28,7 +28,7 @@ import ( ) const ( - fifteenMinutesMs = 15 * 60 * 1000 + fifteenMinutesSec = 15 * 60 reservedFieldNameStepTimeout = "cre_step_timeout" maxStepTimeoutOverrideSec = 10 * 60 // 10 minutes ) @@ -446,7 +446,7 @@ func (e *Engine) registerTrigger(ctx context.Context, t *triggerCapability, trig } eventsCh, err := t.trigger.RegisterTrigger(ctx, triggerRegRequest) if err != nil { - e.metrics.incrementRegisterTriggerFailureCounter(ctx) + e.metrics.with(platform.KeyTriggerID, triggerID).incrementRegisterTriggerFailureCounter(ctx) // It's confusing that t.ID is different from triggerID, but // t.ID is the capability ID, and triggerID is the trigger ID. // @@ -704,7 +704,7 @@ func (e *Engine) finishExecution(ctx context.Context, cma custmsg.MessageEmitter e.metrics.updateWorkflowTimeoutDurationHistogram(ctx, executionDuration) } - if executionDuration > fifteenMinutesMs { + if executionDuration > fifteenMinutesSec { logCustMsg(ctx, cma, fmt.Sprintf("execution duration exceeded 15 minutes: %d (seconds)", executionDuration), l) l.Warnf("execution duration exceeded 15 minutes: %d (seconds)", executionDuration) } diff --git a/core/services/workflows/monitoring.go b/core/services/workflows/monitoring.go index 8457dadeb60..b73ee6e5eda 100644 --- a/core/services/workflows/monitoring.go +++ b/core/services/workflows/monitoring.go @@ -143,19 +143,19 @@ func MetricViews() []sdkmetric.View { sdkmetric.NewView( sdkmetric.Instrument{Name: "platform_engine_workflow_earlyexit_time_seconds"}, sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ - Boundaries: []float64{0, 1, 10, 100}, + Boundaries: []float64{0, 1, 10, 30, 120}, }}, ), sdkmetric.NewView( sdkmetric.Instrument{Name: "platform_engine_workflow_completed_time_seconds"}, sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ - Boundaries: []float64{0, 100, 1000, 10_000, 50_000, 100_0000, 500_000}, + Boundaries: []float64{0, 10, 30, 60, 120, 300, 600, 900, 1200}, }}, ), sdkmetric.NewView( sdkmetric.Instrument{Name: "platform_engine_workflow_error_time_seconds"}, sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ - Boundaries: []float64{0, 20, 60, 120, 240}, + Boundaries: []float64{0, 30, 60, 120, 240, 600}, }}, ), sdkmetric.NewView(