diff --git a/cmd/tetragon/flags.go b/cmd/tetragon/flags.go index 986de7aada9..3ddec22888c 100644 --- a/cmd/tetragon/flags.go +++ b/cmd/tetragon/flags.go @@ -31,14 +31,15 @@ const ( keyEnableCiliumAPI = "enable-cilium-api" keyEnableProcessAncestors = "enable-process-ancestors" - keyMetricsServer = "metrics-server" - keyServerAddress = "server-address" - keyGopsAddr = "gops-address" - keyEnableProcessCred = "enable-process-cred" - keyEnableProcessNs = "enable-process-ns" - keyConfigFile = "config-file" - keyTracingPolicy = "tracing-policy" - keyTracingPolicyDir = "tracing-policy-dir" + keyMetricsServer = "metrics-server" + keyMetricsLabelFilter = "metrics-label-filter" + keyServerAddress = "server-address" + keyGopsAddr = "gops-address" + keyEnableProcessCred = "enable-process-cred" + keyEnableProcessNs = "enable-process-ns" + keyConfigFile = "config-file" + keyTracingPolicy = "tracing-policy" + keyTracingPolicyDir = "tracing-policy-dir" keyCpuProfile = "cpuprofile" keyMemProfile = "memprofile" @@ -114,6 +115,7 @@ func readAndSetFlags() { option.Config.DataCacheSize = viper.GetInt(keyDataCacheSize) option.Config.MetricsServer = viper.GetString(keyMetricsServer) + option.Config.MetricsLabelFilter = viper.GetStringMap(keyMetricsLabelFilter) option.Config.ServerAddress = viper.GetString(keyServerAddress) option.Config.ExportFilename = viper.GetString(keyExportFilename) diff --git a/install/kubernetes/templates/tetragon_configmap.yaml b/install/kubernetes/templates/tetragon_configmap.yaml index 242e9c9d959..c911f887298 100644 --- a/install/kubernetes/templates/tetragon_configmap.yaml +++ b/install/kubernetes/templates/tetragon_configmap.yaml @@ -37,6 +37,10 @@ data: {{- else }} metrics-server: "" {{- end }} +{{- if .Values.tetragon.prometheus.enabled }} + metrics-label-filter: {{.Values.tetragon.prometheus.metricsLabelFilter }} +{{- else }} +{{- end }} {{- if .Values.tetragon.grpc.enabled }} server-address: {{ .Values.tetragon.grpc.address }} {{- else }} diff --git a/install/kubernetes/values.yaml b/install/kubernetes/values.yaml index 717d10453f0..1c908fa1c8f 100644 --- a/install/kubernetes/values.yaml +++ b/install/kubernetes/values.yaml @@ -126,6 +126,8 @@ tetragon: address: "" # -- The port at which to expose metrics. port: 2112 + # -- The labels to include with supporting metrics. + metricsLabelFilter: ["namespace", "workload", "pod", "binary"] serviceMonitor: # -- Whether to create a 'ServiceMonitor' resource targeting the 'tetragon' pods. enabled: false diff --git a/pkg/metrics/consts/consts.go b/pkg/metrics/consts/consts.go index f7534053268..a55642f537f 100644 --- a/pkg/metrics/consts/consts.go +++ b/pkg/metrics/consts/consts.go @@ -4,3 +4,4 @@ package consts var MetricsNamespace = "tetragon" +var KnownMetricLabelFilters = []string{"namespace", "workload", "pod", "binary"} diff --git a/pkg/metrics/eventmetrics/eventmetrics.go b/pkg/metrics/eventmetrics/eventmetrics.go index 3a4afb826e6..5405911de68 100644 --- a/pkg/metrics/eventmetrics/eventmetrics.go +++ b/pkg/metrics/eventmetrics/eventmetrics.go @@ -25,7 +25,7 @@ var ( Name: "events_total", Help: "The total number of Tetragon events", ConstLabels: nil, - }, []string{"type", "namespace", "workload", "pod", "binary"}) + }, metrics.FilterMetricLabels("type", "namespace", "workload", "pod", "binary")) FlagCount = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: consts.MetricsNamespace, Name: "flags_total", @@ -44,7 +44,7 @@ var ( Name: "policy_events_total", Help: "Policy events calls observed.", ConstLabels: nil, - }, []string{"policy", "hook", "namespace", "workload", "pod", "binary"}) + }, metrics.FilterMetricLabels("policy", "hook", "namespace", "workload", "pod", "binary")) ) func InitMetrics(registry *prometheus.Registry) { @@ -93,10 +93,10 @@ func handleProcessedEvent(pInfo *tracingpolicy.PolicyInfo, processedEvent interf default: eventType = "unknown" } - EventsProcessed.WithLabelValues(eventType, namespace, workload, pod, binary).Inc() + EventsProcessed.WithLabelValues(metrics.FilterMetricLabels(eventType, namespace, workload, pod, binary)...).Inc() if pInfo != nil && pInfo.Name != "" { policyStats. - WithLabelValues(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary). + WithLabelValues(metrics.FilterMetricLabels(pInfo.Name, pInfo.Hook, namespace, workload, pod, binary)...). Inc() } } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 7c232de0b41..1b65db8fa09 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -9,6 +9,8 @@ import ( "time" "github.com/cilium/tetragon/pkg/logger" + "github.com/cilium/tetragon/pkg/metrics/consts" + "github.com/cilium/tetragon/pkg/option" "github.com/cilium/tetragon/pkg/podhooks" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" @@ -142,3 +144,19 @@ func EnableMetrics(address string) { http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg})) http.ListenAndServe(address, nil) } + +// The filtered metric labels must be passed last and in the exact order of consts.KnownMetricLabelFilters. +func FilterMetricLabels(labels ...string) []string { + offset := len(labels) - len(consts.KnownMetricLabelFilters) + if offset < 0 { + // Uh-oh. + return labels + } + result := labels[:offset] + for i, label := range consts.KnownMetricLabelFilters { + if _, ok := option.Config.MetricsLabelFilter[label]; ok { + result = append(result, labels[i+offset]) + } + } + return result +} diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 82f613471ed..e97403d9c84 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -18,12 +18,35 @@ import ( "github.com/cilium/tetragon/pkg/metrics" "github.com/cilium/tetragon/pkg/metrics/config" "github.com/cilium/tetragon/pkg/metrics/eventmetrics" + "github.com/cilium/tetragon/pkg/option" ) var sampleMsgGenericTracepointUnix = tracing.MsgGenericTracepointUnix{ PolicyName: "fake-policy", } +func TestFilterMetricLabels(t *testing.T) { + option.Config.MetricsLabelFilter = map[string]interface{}{ + "namespace": nil, + "workload": nil, + "pod": nil, + "binary": nil, + } + assert.Equal(t, []string{"type", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary")) + assert.Equal(t, []string{"syscall", "namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary")) + assert.Equal(t, []string{"namespace", "workspace", "pod", "binary"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary")) + + option.Config.MetricsLabelFilter = map[string]interface{}{ + "namespace": nil, + "workload": nil, + } + assert.Equal(t, []string{"type", "namespace", "workspace"}, metrics.FilterMetricLabels("type", "namespace", "workspace", "pod", "binary")) + assert.Equal(t, []string{"syscall", "namespace", "workspace"}, metrics.FilterMetricLabels("syscall", "namespace", "workspace", "pod", "binary")) + assert.Equal(t, []string{"namespace", "workspace"}, metrics.FilterMetricLabels("namespace", "workspace", "pod", "binary")) + + assert.Equal(t, []string{"type", "syscall"}, metrics.FilterMetricLabels("type", "syscall")) +} + func TestPodDelete(t *testing.T) { reg := metrics.GetRegistry() config.InitAllMetrics(reg) diff --git a/pkg/metrics/syscallmetrics/syscallmetrics.go b/pkg/metrics/syscallmetrics/syscallmetrics.go index fff33972854..1d181cd77d8 100644 --- a/pkg/metrics/syscallmetrics/syscallmetrics.go +++ b/pkg/metrics/syscallmetrics/syscallmetrics.go @@ -17,7 +17,7 @@ var ( Name: "syscalls_total", Help: "System calls observed.", ConstLabels: nil, - }, []string{"syscall", "namespace", "workload", "pod", "binary"}) + }, metrics.FilterMetricLabels("syscall", "namespace", "workload", "pod", "binary")) ) func InitMetrics(registry *prometheus.Registry) { @@ -46,7 +46,7 @@ func Handle(event interface{}) { } if syscall != "" { - syscallStats.WithLabelValues(syscall, namespace, workload, pod, binary).Inc() + syscallStats.WithLabelValues(metrics.FilterMetricLabels(syscall, namespace, workload, pod, binary)...).Inc() } } diff --git a/pkg/option/config.go b/pkg/option/config.go index 20789896753..a69a9ddf98b 100644 --- a/pkg/option/config.go +++ b/pkg/option/config.go @@ -46,10 +46,11 @@ type config struct { ProcessCacheSize int DataCacheSize int - MetricsServer string - ServerAddress string - TracingPolicy string - TracingPolicyDir string + MetricsServer string + MetricsLabelFilter map[string]interface{} + ServerAddress string + TracingPolicy string + TracingPolicyDir string ExportFilename string ExportFileMaxSizeMB int @@ -93,6 +94,14 @@ var ( // LogOpts contains logger parameters LogOpts: make(map[string]string), + + // Default to logging metrics with the greatest granularity. + MetricsLabelFilter: map[string]interface{}{ + "namespace": nil, + "workload": nil, + "pod": nil, + "binary": nil, + }, } )