Skip to content

Commit

Permalink
FFM-7702 Clean up cache metrics registration
Browse files Browse the repository at this point in the history
**What**

- Cleans up the prometheus registration for the cache
- Puts a check in so we don't register CacheNotFound errors as errors in
  prometheus metrics. I think the error label in the prometheus should
be reserved for internal/unexpected errors, we already track the status
code in the http handlers so can see when requests have been made for a
resource that doesn't exist

**Why**

I was a bit sloppy when initailly registering some of the metrics for
the cache and spotted they weren't coming throuhg during my load
testing.

**Testing**

Tested these changes out locally
  • Loading branch information
jcox250 committed May 16, 2023
1 parent 4e28292 commit bfaa4bc
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 6 deletions.
17 changes: 13 additions & 4 deletions cache/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@ package cache
import (
"context"
"encoding"
"errors"
"fmt"
"time"

"github.com/harness/ff-proxy/domain"
"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -39,21 +41,21 @@ func NewMetricsCache(label string, reg prometheus.Registerer, next Cache) Metric
next: next,

writeDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "ff_proxy_%s_cache_write_duration",
Name: fmt.Sprintf("ff_proxy_%s_cache_write_duration", label),
Help: "Tracks how long write operations to the cache take",
Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
},
[]string{},
),
readDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "ff_proxy_%s_cache_read_duration",
Name: fmt.Sprintf("ff_proxy_%s_cache_read_duration", label),
Help: "Tracks how long write operations to the cache take",
Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
},
[]string{},
),
deleteDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "ff_proxy_%s_cache_delete_duration",
Name: fmt.Sprintf("ff_proxy_%s_cache_delete_duration", label),
Help: "Tracks how long delete operations to the cache take",
Buckets: []float64{0.01, 0.025, 0.05, 0.075, 0.1, 0.25, 0.5},
},
Expand All @@ -73,14 +75,17 @@ func NewMetricsCache(label string, reg prometheus.Registerer, next Cache) Metric
[]string{"key", "operation", "error"},
),
deleteCount: prometheus.NewCounterVec(prometheus.CounterOpts{
Name: fmt.Sprintf("ff_proxy_%s_cache_remove_duration", label),
Name: fmt.Sprintf("ff_proxy_%s_cache_remove_count", label),
Help: "Tracks how many deletes we make to the cache",
},
[]string{"key", "operation"},
),
}

reg.MustRegister(
c.writeDuration,
c.readDuration,
c.deleteDuration,
c.writeCount,
c.readCount,
c.deleteCount,
Expand Down Expand Up @@ -187,6 +192,10 @@ func trackCounter(metric counter, labels ...string) {

func getErrorLabel(err error) string {
if err != nil {
// Don't want to track NotFound as an error in prometheus metrics
if errors.Is(err, domain.ErrCacheNotFound) {
return "false"
}
return "true"
}
return "false"
Expand Down
4 changes: 2 additions & 2 deletions cache/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func TestCacheMetrics_GetAll(t *testing.T) {
readCount *mockCounter
expected result
}{
"Given I call GetAll and the decorated cache errors": {
"Given I call GetAll and the decorated cache errors with a domain.ErrCacheNotFound error": {
args: args{
key: "foo",
cacheData: map[string]map[string][]byte{},
Expand All @@ -264,7 +264,7 @@ func TestCacheMetrics_GetAll(t *testing.T) {

expected: result{
observations: 1,
labels: []string{"foo", "GetAll", "true"},
labels: []string{"foo", "GetAll", "false"},
},
},
"Given I call GetAll and the decorated cache doesn't error": {
Expand Down

0 comments on commit bfaa4bc

Please sign in to comment.