From bfaa4bcbfaf9ad6981577833047154ed9b5d4753 Mon Sep 17 00:00:00 2001
From: James Cox <james.cox@harness.io>
Date: Tue, 16 May 2023 11:31:23 +0100
Subject: [PATCH] FFM-7702 Clean up cache metrics registration

**What**

- Cleans up the prometheus registration for the cache
- Puts a check in so we don't register CacheNotFound errors as errors in
  prometheus metrics. I think the error label in the prometheus should
be reserved for internal/unexpected errors, we already track the status
code in the http handlers so can see when requests have been made for a
resource that doesn't exist

**Why**

I was a bit sloppy when initailly registering some of the metrics for
the cache and spotted they weren't coming throuhg during my load
testing.

**Testing**

Tested these changes out locally
---
 cache/metrics.go      | 17 +++++++++++++----
 cache/metrics_test.go |  4 ++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/cache/metrics.go b/cache/metrics.go
index a8199b7f..e019c393 100644
--- a/cache/metrics.go
+++ b/cache/metrics.go
@@ -3,9 +3,11 @@ package cache
 import (
 	"context"
 	"encoding"
+	"errors"
 	"fmt"
 	"time"
 
+	"github.com/harness/ff-proxy/domain"
 	"github.com/prometheus/client_golang/prometheus"
 )
 
@@ -39,21 +41,21 @@ func NewMetricsCache(label string, reg prometheus.Registerer, next Cache) Metric
 		next: next,
 
 		writeDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
-			Name:    "ff_proxy_%s_cache_write_duration",
+			Name:    fmt.Sprintf("ff_proxy_%s_cache_write_duration", label),
 			Help:    "Tracks how long write operations to the cache take",
 			Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
 		},
 			[]string{},
 		),
 		readDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
-			Name:    "ff_proxy_%s_cache_read_duration",
+			Name:    fmt.Sprintf("ff_proxy_%s_cache_read_duration", label),
 			Help:    "Tracks how long write operations to the cache take",
 			Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
 		},
 			[]string{},
 		),
 		deleteDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
-			Name:    "ff_proxy_%s_cache_delete_duration",
+			Name:    fmt.Sprintf("ff_proxy_%s_cache_delete_duration", label),
 			Help:    "Tracks how long delete operations to the cache take",
 			Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
 		},
@@ -73,7 +75,7 @@ func NewMetricsCache(label string, reg prometheus.Registerer, next Cache) Metric
 			[]string{"key", "operation", "error"},
 		),
 		deleteCount: prometheus.NewCounterVec(prometheus.CounterOpts{
-			Name: fmt.Sprintf("ff_proxy_%s_cache_remove_duration", label),
+			Name: fmt.Sprintf("ff_proxy_%s_cache_remove_count", label),
 			Help: "Tracks how many deletes we make to the cache",
 		},
 			[]string{"key", "operation"},
@@ -81,6 +83,9 @@ func NewMetricsCache(label string, reg prometheus.Registerer, next Cache) Metric
 	}
 
 	reg.MustRegister(
+		c.writeDuration,
+		c.readDuration,
+		c.deleteDuration,
 		c.writeCount,
 		c.readCount,
 		c.deleteCount,
@@ -187,6 +192,10 @@ func trackCounter(metric counter, labels ...string) {
 
 func getErrorLabel(err error) string {
 	if err != nil {
+		// Don't want to track NotFound as an error in prometheus metrics
+		if errors.Is(err, domain.ErrCacheNotFound) {
+			return "false"
+		}
 		return "true"
 	}
 	return "false"
diff --git a/cache/metrics_test.go b/cache/metrics_test.go
index 1ea19b57..9adc8ca6 100644
--- a/cache/metrics_test.go
+++ b/cache/metrics_test.go
@@ -252,7 +252,7 @@ func TestCacheMetrics_GetAll(t *testing.T) {
 		readCount    *mockCounter
 		expected     result
 	}{
-		"Given I call GetAll and the decorated cache errors": {
+		"Given I call GetAll and the decorated cache errors with a domain.ErrCacheNotFound error": {
 			args: args{
 				key:       "foo",
 				cacheData: map[string]map[string][]byte{},
@@ -264,7 +264,7 @@ func TestCacheMetrics_GetAll(t *testing.T) {
 
 			expected: result{
 				observations: 1,
-				labels:       []string{"foo", "GetAll", "true"},
+				labels:       []string{"foo", "GetAll", "false"},
 			},
 		},
 		"Given I call GetAll and the decorated cache doesn't error": {