diff --git a/core/services/fluxmonitorv2/flux_monitor.go b/core/services/fluxmonitorv2/flux_monitor.go index 93d3752eb59..fe8a22d4177 100644 --- a/core/services/fluxmonitorv2/flux_monitor.go +++ b/core/services/fluxmonitorv2/flux_monitor.go @@ -1040,7 +1040,7 @@ func (fm *FluxMonitor) isValidSubmission(l logger.Logger, answer decimal.Decimal pipeline.PromPipelineTaskExecutionTime.WithLabelValues(fmt.Sprintf("%d", jobId), jobName, "", job.FluxMonitor.String()).Set(float64(elapsed)) pipeline.PromPipelineRunErrors.WithLabelValues(fmt.Sprintf("%d", jobId), jobName).Inc() pipeline.PromPipelineRunTotalTimeToCompletion.WithLabelValues(fmt.Sprintf("%d", jobId), jobName).Set(float64(elapsed)) - pipeline.PromPipelineTasksTotalFinished.WithLabelValues(fmt.Sprintf("%d", jobId), jobName, "", job.FluxMonitor.String(), "error").Inc() + pipeline.PromPipelineTasksTotalFinished.WithLabelValues(fmt.Sprintf("%d", jobId), jobName, "", job.FluxMonitor.String(), "", "error").Inc() return false } diff --git a/core/services/pipeline/runner.go b/core/services/pipeline/runner.go index 3366a177ba8..20319682ef6 100644 --- a/core/services/pipeline/runner.go +++ b/core/services/pipeline/runner.go @@ -98,7 +98,7 @@ var ( Name: "pipeline_tasks_total_finished", Help: "The total number of pipeline tasks which have finished", }, - []string{"job_id", "job_name", "task_id", "task_type", "status"}, + []string{"job_id", "job_name", "task_id", "task_type", "bridge_name", "status"}, ) ) @@ -488,7 +488,13 @@ func logTaskRunToPrometheus(trr TaskRunResult, spec Spec) { } else { status = "completed" } - PromPipelineTasksTotalFinished.WithLabelValues(fmt.Sprintf("%d", spec.JobID), spec.JobName, trr.Task.DotID(), string(trr.Task.Type()), status).Inc() + + bridgeName := "" + if bridgeTask, ok := trr.Task.(*BridgeTask); ok { + bridgeName = bridgeTask.Name + } + + PromPipelineTasksTotalFinished.WithLabelValues(fmt.Sprintf("%d", spec.JobID), spec.JobName, trr.Task.DotID(), string(trr.Task.Type()), bridgeName, status).Inc() } // ExecuteAndInsertFinishedRun executes a run in memory then inserts the finished run/task run records, returning the final result diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 17406c11fff..2399e1da2a0 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -21,6 +21,7 @@ URL = '...' ServerPubKey = '...' ``` These will eventually replace `TelemetryIngress.URL` and `TelemetryIngress.ServerPubKey`. Setting `TelemetryIngress.URL` and `TelemetryIngress.ServerPubKey` alongside `[[TelemetryIngress.Endpoints]]` will prevent the node from booting. Only one way of configuring telemetry endpoints is supported. +- Added bridge_name label to `pipeline_tasks_total_finished` prometheus metric. This should make it easier to see directly what bridge was failing out from the CL NODE perspective. ### Upcoming Required Configuration Change