Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add status metric to config #3656

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions pkg/controller/config/config_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/open-policy-agent/gatekeeper/v3/pkg/controller/config/process"
"github.com/open-policy-agent/gatekeeper/v3/pkg/controller/configstatus"
"github.com/open-policy-agent/gatekeeper/v3/pkg/keys"
"github.com/open-policy-agent/gatekeeper/v3/pkg/metrics"
"github.com/open-policy-agent/gatekeeper/v3/pkg/readiness"
"github.com/open-policy-agent/gatekeeper/v3/pkg/util"
"github.com/open-policy-agent/gatekeeper/v3/pkg/watch"
Expand Down Expand Up @@ -93,13 +94,19 @@ func newReconciler(mgr manager.Manager, cm *cm.CacheManager, cs *watch.Controlle
return nil, fmt.Errorf("cacheManager must be non-nil")
}

r, err := newStatsReporter()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What are the observability goals for this metric? As in: what kinds of conclusions should consumers be able to draw by observing this metric? Is the data we are expressing sufficient to support those goals?

What are the gaps in reporting present in this current code?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If GK admin and consumer of metrics are two different users, then I think having number of kinds getting synced in config resource might be helpful when access to config resource is resticted?

if err != nil {
return nil, err
}

return &ReconcileConfig{
reader: mgr.GetCache(),
writer: mgr.GetClient(),
statusClient: mgr.GetClient(),
scheme: mgr.GetScheme(),
cs: cs,
cacheManager: cm,
metrics: r,
tracker: tracker,
getPod: getPod,
}, nil
Expand Down Expand Up @@ -139,6 +146,7 @@ type ReconcileConfig struct {
scheme *runtime.Scheme
cacheManager *cm.CacheManager
cs *watch.ControllerSwitch
metrics *reporter

tracker *readiness.Tracker

Expand All @@ -164,6 +172,15 @@ func (r *ReconcileConfig) Reconcile(ctx context.Context, request reconcile.Reque
}
}

reportMetrics := false
defer func() {
if reportMetrics {
if err := r.metrics.reportConfig(ctx, metrics.ActiveStatus, 0); err != nil {
log.Info("Error when reporting config status metric", err)
}
}
}()

// Fetch the Config instance
if request.NamespacedName != keys.Config {
log.Info("Ignoring unsupported config name", "namespace", request.NamespacedName.Namespace, "name", request.NamespacedName.Name)
Expand Down Expand Up @@ -199,6 +216,8 @@ func (r *ReconcileConfig) Reconcile(ctx context.Context, request reconcile.Reque

newExcluder.Add(instance.Spec.Match)
statsEnabled = instance.Spec.Readiness.StatsEnabled
// Report metrics of config only if its not deleted.
reportMetrics = true
}

// Enable verbose readiness stats if requested.
Expand Down
54 changes: 54 additions & 0 deletions pkg/controller/config/stats_reporter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package config

import (
"context"
"sync"

"github.com/open-policy-agent/gatekeeper/v3/pkg/metrics"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

const (
cfgMetricName = "config"
statusKey = "status"
)

func (r *reporter) observeConfig(_ context.Context, observer metric.Int64Observer) error {
r.mux.RLock()
defer r.mux.RUnlock()
for t, v := range r.configReport {
observer.Observe(v, metric.WithAttributes(attribute.String(statusKey, string(t))))
}
return nil
}

func (r *reporter) reportConfig(_ context.Context, t metrics.Status, v int64) error {
r.mux.Lock()
defer r.mux.Unlock()
if r.configReport == nil {
r.configReport = make(map[metrics.Status]int64)
}
r.configReport[t] = v
return nil
}

// newStatsReporter creates a reporter for audit metrics.
func newStatsReporter() (*reporter, error) {
r := &reporter{}
var err error
meter := otel.GetMeterProvider().Meter("gatekeeper")
_, err = meter.Int64ObservableGauge(
cfgMetricName,
metric.WithDescription("Config Status"), metric.WithInt64Callback(r.observeConfig))
if err != nil {
return nil, err
}
return r, nil
}

type reporter struct {
mux sync.RWMutex
configReport map[metrics.Status]int64
}
66 changes: 66 additions & 0 deletions pkg/controller/config/stats_reporter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package config

import (
"context"
"testing"

"github.com/open-policy-agent/gatekeeper/v3/pkg/metrics"
testmetric "github.com/open-policy-agent/gatekeeper/v3/test/metrics"
"github.com/stretchr/testify/assert"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"go.opentelemetry.io/otel/sdk/metric/metricdata/metricdatatest"
)

func initializeTestInstruments(t *testing.T) (rdr *sdkmetric.PeriodicReader, r *reporter) {
var err error
rdr = sdkmetric.NewPeriodicReader(new(testmetric.FnExporter))
mp := sdkmetric.NewMeterProvider(sdkmetric.WithReader(rdr))
r, err = newStatsReporter()
assert.NoError(t, err)
meter := mp.Meter("test")

// Ensure the pipeline has a callback setup
_, err = meter.Int64ObservableGauge(cfgMetricName, metric.WithInt64Callback(r.observeConfig))
assert.NoError(t, err)
return rdr, r
}

func TestReportConfig(t *testing.T) {
tests := []struct {
name string
ctx context.Context
expectedErr error
want metricdata.Metrics
}{
{
name: "reporting config status",
ctx: context.Background(),
expectedErr: nil,
want: metricdata.Metrics{
Name: cfgMetricName,
Data: metricdata.Gauge[int64]{
DataPoints: []metricdata.DataPoint[int64]{
{Attributes: attribute.NewSet(attribute.String(statusKey, string(metrics.ActiveStatus))), Value: 0},
{Attributes: attribute.NewSet(attribute.String(statusKey, string(metrics.ErrorStatus))), Value: 0},
},
},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rdr, r := initializeTestInstruments(t)
for _, status := range metrics.AllStatuses {
assert.NoError(t, r.reportConfig(tt.ctx, status, 0))
}

rm := &metricdata.ResourceMetrics{}
assert.Equal(t, tt.expectedErr, rdr.Collect(tt.ctx, rm))
metricdatatest.AssertEqual(t, tt.want, rm.ScopeMetrics[0].Metrics[0], metricdatatest.IgnoreTimestamp())
})
}
}
Loading