Skip to content

Commit

Permalink
Merge pull request #128 from uselagoon/metrics
Browse files Browse the repository at this point in the history
feat: add initial lagoon metrics
  • Loading branch information
shreddedbacon authored May 13, 2022
2 parents 471a70c + fe29d79 commit d650497
Show file tree
Hide file tree
Showing 18 changed files with 258 additions and 1 deletion.
1 change: 1 addition & 0 deletions apis/lagoon/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
lagoonv1beta1 "github.com/uselagoon/remote-controller/apis/lagoon/v1beta1"
"github.com/uselagoon/remote-controller/internal/helpers"
)
Expand Down Expand Up @@ -860,6 +861,16 @@ func (r *LagoonBuildReconciler) processBuild(ctx context.Context, opLog logr.Log
// @TODO: should update the build to failed
return nil
}
buildRunningStatus.With(prometheus.Labels{
"build_namespace": lagoonBuild.ObjectMeta.Namespace,
"build_name": lagoonBuild.ObjectMeta.Name,
}).Set(1)
buildStatus.With(prometheus.Labels{
"build_namespace": lagoonBuild.ObjectMeta.Namespace,
"build_name": lagoonBuild.ObjectMeta.Name,
"build_step": "running",
}).Set(1)
buildsStartedCounter.Inc()
// then break out of the build
}
opLog.Info(fmt.Sprintf("Build pod already running for: %s", lagoonBuild.ObjectMeta.Name))
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
92 changes: 92 additions & 0 deletions controllers/v1beta1/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package v1beta1

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
// general counters for builds
buildsRunningGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_builds_running_current",
Help: "The total number of Lagoon builds running",
})
buildsPendingGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_builds_pending_current",
Help: "The total number of Lagoon builds pending",
})
buildsStartedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_started_total",
Help: "The total number of Lagoon builds started",
})
buildsCompletedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_completed_total",
Help: "The total number of Lagoon builds completed",
})
buildsFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_failed_total",
Help: "The total number of Lagoon builds failed",
})
buildsCancelledCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_builds_cancelled_total",
Help: "The total number of Lagoon builds cancelled",
})

// general counters for tasks
tasksRunningGauge = promauto.NewGauge(prometheus.GaugeOpts{
Name: "lagoon_tasks_running_current",
Help: "The total number of Lagoon tasks running",
})
tasksStartedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_started_total",
Help: "The total number of Lagoon tasks started",
})
tasksCompletedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_completed_total",
Help: "The total number of Lagoon tasks completed",
})
tasksFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_failed_total",
Help: "The total number of Lagoon tasks failed",
})
tasksCancelledCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "lagoon_tasks_cancelled_total",
Help: "The total number of Lagoon tasks cancelled",
})

// buildStatus will count the build transisiton steps
// when the build step changes, the count is removed and the new step metric is created
// this is useful to gauge how long particular steps take in a build
buildStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_build_status",
Help: "The status of running Lagoon builds",
},
[]string{
"build_name",
"build_namespace",
"build_step",
},
)

// RunningStatus will count when a build or task is running
// when the build or task is complete, the count is removed
// this is useful to gauge how long a build or task runs for
buildRunningStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_build_running_status",
Help: "The duration of running Lagoon builds",
},
[]string{
"build_name",
"build_namespace",
},
)
taskRunningStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "lagoon_task_running_status",
Help: "The duration of running Lagoon tasks",
},
[]string{
"task_name",
"task_namespace",
},
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
lagoonv1beta1 "github.com/uselagoon/remote-controller/apis/lagoon/v1beta1"
"github.com/uselagoon/remote-controller/internal/helpers"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -228,20 +229,31 @@ func (r *LagoonMonitorReconciler) updateDeploymentAndEnvironmentTask(ctx context
switch jobPod.Status.Phase {
case corev1.PodFailed:
condition = "failed"
buildsFailedCounter.Inc()
case corev1.PodRunning:
condition = "running"
case corev1.PodSucceeded:
condition = "complete"
buildsCompletedCounter.Inc()
}
if value, ok := lagoonBuild.Labels["lagoon.sh/buildStatus"]; ok {
if value == string(lagoonv1beta1.BuildStatusCancelled) {
condition = "cancelled"
buildsCancelledCounter.Inc()
}
}
buildStep := "running"
if value, ok := jobPod.Labels["lagoon.sh/buildStep"]; ok {
buildStep = value
}
if condition == "failed" || condition == "complete" || condition == "cancelled" {
time.AfterFunc(31*time.Second, func() {
buildRunningStatus.Delete(prometheus.Labels{
"build_namespace": lagoonBuild.ObjectMeta.Namespace,
"build_name": lagoonBuild.ObjectMeta.Name,
})
})
}
msg := lagoonv1beta1.LagoonMessage{
Type: "build",
Namespace: namespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,21 @@ func (r *LagoonMonitorReconciler) Reconcile(ctx context.Context, req ctrl.Reques

// if this is a lagoon task, then run the handle task monitoring process
if jobPod.ObjectMeta.Labels["lagoon.sh/jobType"] == "task" {
err := r.calculateTaskMetrics(ctx)
if err != nil {
opLog.Error(err, fmt.Sprintf("Unable to generate metrics."))
}
if jobPod.ObjectMeta.DeletionTimestamp.IsZero() {
// pod is not being deleted
return ctrl.Result{}, r.handleTaskMonitor(ctx, opLog, req, jobPod)
}
}
// if this is a lagoon build, then run the handle build monitoring process
if jobPod.ObjectMeta.Labels["lagoon.sh/jobType"] == "build" {
err := r.calculateBuildMetrics(ctx)
if err != nil {
opLog.Error(err, fmt.Sprintf("Unable to generate metrics."))
}
if jobPod.ObjectMeta.DeletionTimestamp.IsZero() {
// pod is not being deleted
return ctrl.Result{}, r.handleBuildMonitor(ctx, opLog, req, jobPod)
Expand All @@ -86,7 +94,7 @@ func (r *LagoonMonitorReconciler) Reconcile(ctx context.Context, req ctrl.Reques
// first try and clean up the pod and capture the logs and update
// the lagoonbuild that owns it with the status
var lagoonBuild lagoonv1beta1.LagoonBuild
err := r.Get(ctx, types.NamespacedName{
err = r.Get(ctx, types.NamespacedName{
Namespace: jobPod.ObjectMeta.Namespace,
Name: jobPod.ObjectMeta.Labels["lagoon.sh/buildName"],
}, &lagoonBuild)
Expand Down
51 changes: 51 additions & 0 deletions controllers/v1beta1/podmonitor_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package v1beta1

import (
"context"
"fmt"

corev1 "k8s.io/api/core/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
)

func (r *LagoonMonitorReconciler) calculateBuildMetrics(ctx context.Context) error {
listOption := (&client.ListOptions{}).ApplyOptions([]client.ListOption{
client.MatchingLabels(map[string]string{
"lagoon.sh/jobType": "build",
"lagoon.sh/controller": r.ControllerNamespace,
}),
})
buildPods := &corev1.PodList{}
if err := r.List(ctx, buildPods, listOption); err != nil {
return fmt.Errorf("Unable to list builds in the cluster, there may be none or something went wrong: %v", err)
}
runningBuilds := float64(0)
for _, buildPod := range buildPods.Items {
if buildPod.Status.Phase == corev1.PodRunning {
runningBuilds = runningBuilds + 1
}
}
buildsRunningGauge.Set(runningBuilds)
return nil
}

func (r *LagoonMonitorReconciler) calculateTaskMetrics(ctx context.Context) error {
listOption := (&client.ListOptions{}).ApplyOptions([]client.ListOption{
client.MatchingLabels(map[string]string{
"lagoon.sh/jobType": "task",
"lagoon.sh/controller": r.ControllerNamespace,
}),
})
taskPods := &corev1.PodList{}
if err := r.List(ctx, taskPods, listOption); err != nil {
return fmt.Errorf("Unable to list tasks in the cluster, there may be none or something went wrong: %v", err)
}
runningTasks := float64(0)
for _, taskPod := range taskPods.Items {
if taskPod.Status.Phase == corev1.PodRunning {
runningTasks = runningTasks + 1
}
}
tasksRunningGauge.Set(runningTasks)
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
lagoonv1beta1 "github.com/uselagoon/remote-controller/apis/lagoon/v1beta1"
"github.com/uselagoon/remote-controller/internal/helpers"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -210,16 +211,27 @@ func (r *LagoonMonitorReconciler) updateLagoonTask(opLog logr.Logger,
switch jobPod.Status.Phase {
case corev1.PodFailed:
condition = "failed"
tasksFailedCounter.Inc()
case corev1.PodRunning:
condition = "running"
case corev1.PodSucceeded:
condition = "complete"
tasksCompletedCounter.Inc()
}
if value, ok := lagoonTask.Labels["lagoon.sh/taskStatus"]; ok {
if value == string(lagoonv1beta1.TaskStatusCancelled) {
condition = "cancelled"
tasksCancelledCounter.Inc()
}
}
if condition == "failed" || condition == "complete" || condition == "cancelled" {
time.AfterFunc(31*time.Second, func() {
taskRunningStatus.Delete(prometheus.Labels{
"task_namespace": lagoonTask.ObjectMeta.Namespace,
"task_name": lagoonTask.ObjectMeta.Name,
})
})
}
msg := lagoonv1beta1.LagoonMessage{
Type: "task",
Namespace: namespace,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ package v1beta1

import (
"regexp"
"time"

"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/event"
"sigs.k8s.io/controller-runtime/pkg/predicate"
)
Expand Down Expand Up @@ -68,6 +70,28 @@ func (p PodPredicates) Update(e event.UpdateEvent) bool {
if value == crdVersion {
if _, okOld := e.ObjectOld.GetLabels()["lagoon.sh/buildName"]; okOld {
if value, ok := e.ObjectNew.GetLabels()["lagoon.sh/buildName"]; ok {
oldBuildStep := "running"
newBuildStep := "running"
if value, ok := e.ObjectNew.GetLabels()["lagoon.sh/buildStep"]; ok {
newBuildStep = value
}
if value, ok := e.ObjectOld.GetLabels()["lagoon.sh/buildStep"]; ok {
oldBuildStep = value
}
if newBuildStep != oldBuildStep {
buildStatus.With(prometheus.Labels{
"build_namespace": e.ObjectOld.GetNamespace(),
"build_name": e.ObjectOld.GetName(),
"build_step": newBuildStep,
}).Set(1)
}
time.AfterFunc(31*time.Second, func() {
buildStatus.Delete(prometheus.Labels{
"build_namespace": e.ObjectOld.GetNamespace(),
"build_name": e.ObjectOld.GetName(),
"build_step": oldBuildStep,
})
})
match, _ := regexp.MatchString("^lagoon-build", value)
return match
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"regexp"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -306,6 +307,11 @@ func (r *LagoonTaskReconciler) createStandardTask(ctx context.Context, lagoonTas
//@TODO: send msg back and update task to failed?
return nil
}
taskRunningStatus.With(prometheus.Labels{
"task_namespace": lagoonTask.ObjectMeta.Namespace,
"task_name": lagoonTask.ObjectMeta.Name,
}).Set(1)
tasksStartedCounter.Inc()
} else {
opLog.Info(fmt.Sprintf("Task pod already running for: %s", lagoonTask.ObjectMeta.Name))
}
Expand Down Expand Up @@ -467,6 +473,11 @@ func (r *LagoonTaskReconciler) createAdvancedTask(ctx context.Context, lagoonTas
if err := c.Create(ctx, newPod); err != nil {
return err
}
taskRunningStatus.With(prometheus.Labels{
"task_namespace": lagoonTask.ObjectMeta.Namespace,
"task_name": lagoonTask.ObjectMeta.Name,
}).Set(1)
tasksStartedCounter.Inc()
return nil
}

Expand Down
30 changes: 30 additions & 0 deletions internal/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
package metrics

import (
"fmt"
"net/http"
"time"

"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

// NewServer returns a *http.Server serving prometheus metrics in a new
// goroutine.
// Caller should defer Shutdown() for cleanup.
func NewServer(log logr.Logger, addr string) *http.Server {
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.Handler())
s := http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 16 * time.Second,
WriteTimeout: 16 * time.Second,
}
go func() {
if err := s.ListenAndServe(); err != http.ErrServerClosed {
log.Error(fmt.Errorf("metrics server did not shut down cleanly"), err.Error())
}
}()
return &s
}
Loading

0 comments on commit d650497

Please sign in to comment.