Skip to content

Commit

Permalink
feat: add exit_code label to execution_done_count (#1576)
Browse files Browse the repository at this point in the history
The `exit_code` label adds useful information to the `execution_done_count` metric.
The exit codes are usually `0`, `1` or `127`, this is a low cardinality metric.

This label will allow us use promQL to perform standard operations like calculate
the number of failed vs total jobs.
  • Loading branch information
atmosx authored Sep 12, 2024
1 parent 051ea62 commit a162b3f
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions builtin/bins/dkron-executor-shell/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package main

import (
"log"
"strconv"
"time"

"github.com/prometheus/client_golang/prometheus"
Expand Down Expand Up @@ -40,7 +41,7 @@ var (
Name: "execution_done_count",
Help: "Job Execution Counter",
},
[]string{"job_name"})
[]string{"job_name", "exit_code"})

jobExitCode = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: namespace,
Expand All @@ -60,10 +61,11 @@ func CollectProcessMetrics(jobname string, pid int, quit chan int) {
// log.Println("Exit code received and quit channel closed.")
return
}
exitCodeStr := strconv.Itoa(exitCode)
cpuUsage.WithLabelValues(jobname).Set(0)
memUsage.WithLabelValues(jobname).Set(0)
jobExecutionTime.WithLabelValues(jobname).Set(0)
jobDoneCount.WithLabelValues(jobname).Inc()
jobDoneCount.WithLabelValues(jobname, exitCodeStr).Inc()
jobExitCode.WithLabelValues(jobname).Set(float64(exitCode))
default:
cpu, mem, err := GetTotalCPUMemUsage(pid)
Expand Down

0 comments on commit a162b3f

Please sign in to comment.