Skip to content

Commit

Permalink
Keystone Metric Improvements (#15706)
Browse files Browse the repository at this point in the history
* execution duration is in seconds

* fixing histogram buckets for workflows

* adding triggerID to incrementRegisterTriggerFailureCounter
  • Loading branch information
patrickhuie19 authored Dec 16, 2024
1 parent b76f9b3 commit 5a5d048
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
6 changes: 3 additions & 3 deletions core/services/workflows/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
)

const (
fifteenMinutesMs = 15 * 60 * 1000
fifteenMinutesSec = 15 * 60
reservedFieldNameStepTimeout = "cre_step_timeout"
maxStepTimeoutOverrideSec = 10 * 60 // 10 minutes
)
Expand Down Expand Up @@ -446,7 +446,7 @@ func (e *Engine) registerTrigger(ctx context.Context, t *triggerCapability, trig
}
eventsCh, err := t.trigger.RegisterTrigger(ctx, triggerRegRequest)
if err != nil {
e.metrics.incrementRegisterTriggerFailureCounter(ctx)
e.metrics.with(platform.KeyTriggerID, triggerID).incrementRegisterTriggerFailureCounter(ctx)
// It's confusing that t.ID is different from triggerID, but
// t.ID is the capability ID, and triggerID is the trigger ID.
//
Expand Down Expand Up @@ -704,7 +704,7 @@ func (e *Engine) finishExecution(ctx context.Context, cma custmsg.MessageEmitter
e.metrics.updateWorkflowTimeoutDurationHistogram(ctx, executionDuration)
}

if executionDuration > fifteenMinutesMs {
if executionDuration > fifteenMinutesSec {
logCustMsg(ctx, cma, fmt.Sprintf("execution duration exceeded 15 minutes: %d (seconds)", executionDuration), l)
l.Warnf("execution duration exceeded 15 minutes: %d (seconds)", executionDuration)
}
Expand Down
6 changes: 3 additions & 3 deletions core/services/workflows/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,19 +143,19 @@ func MetricViews() []sdkmetric.View {
sdkmetric.NewView(
sdkmetric.Instrument{Name: "platform_engine_workflow_earlyexit_time_seconds"},
sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
Boundaries: []float64{0, 1, 10, 100},
Boundaries: []float64{0, 1, 10, 30, 120},
}},
),
sdkmetric.NewView(
sdkmetric.Instrument{Name: "platform_engine_workflow_completed_time_seconds"},
sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
Boundaries: []float64{0, 100, 1000, 10_000, 50_000, 100_0000, 500_000},
Boundaries: []float64{0, 10, 30, 60, 120, 300, 600, 900, 1200},
}},
),
sdkmetric.NewView(
sdkmetric.Instrument{Name: "platform_engine_workflow_error_time_seconds"},
sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{
Boundaries: []float64{0, 20, 60, 120, 240},
Boundaries: []float64{0, 30, 60, 120, 240, 600},
}},
),
sdkmetric.NewView(
Expand Down

0 comments on commit 5a5d048

Please sign in to comment.