From 9a695c336fbe2b8919f47788414deb2bfbca26f7 Mon Sep 17 00:00:00 2001 From: Oleh Neichev Date: Wed, 3 Apr 2024 12:31:28 +0300 Subject: [PATCH] policyfiltermetrics: add error and warning labels At the moment, there is no way to tell from Prometheus if an operation of updating the policy was successful or not. There can be two main scenarios apart from success: - an operation is successful but with a warning - an operation is unsuccessful - error. This patch aims to address both cases by adding two labels to the "policyfilter_metrics_total" metric: error and warning. In this change, the "warning" label will be added only for cases when the container name is not found in the OCI hook. The error label will be added to cases when adding a new container to the pod, adding a pod, updating a pod, or deleting a pod fails. Besides, this patch adds a missing return statement for the case when adding a container to pod from OCI hook fails and we inform the user that we are aborting the hook. Fixes: #1879 Signed-off-by: Oleh Neichev --- .../policyfiltermetrics.go | 71 +++++++++++++++++-- pkg/policyfilter/rthooks/rthooks.go | 19 ++++- pkg/policyfilter/state.go | 26 +++++-- 3 files changed, 103 insertions(+), 13 deletions(-) diff --git a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go index 0f3b8693734..a541fa803e7 100644 --- a/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go +++ b/pkg/metrics/policyfiltermetrics/policyfiltermetrics.go @@ -44,23 +44,80 @@ func (s Operation) String() string { return operationLabelValues[s] } +type Warning int + +const ( + SuccessWarning Warning = iota + ContainerNameNotFoundWarning +) + +var warningLabelValues = map[Warning]string{ + SuccessWarning: "", + ContainerNameNotFoundWarning: "container-name-not-found", +} + +func (s Warning) String() string { + return warningLabelValues[s] +} + +type Error int + +const ( + SuccessError Error = iota + AddPodContainerError + AddPodError + UpdatePodError + DeletePodError +) + +var errorLabelValues = map[Error]string{ + SuccessError: "", + AddPodContainerError: "add-pod-container-failed", + AddPodError: "add-pod-error", + UpdatePodError: "update-pod-error", + DeletePodError: "delete-pod-error", +} + +func (s Error) String() string { + return errorLabelValues[s] +} + var ( PolicyFilterOpMetrics = prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: consts.MetricsNamespace, Name: "policyfilter_metrics_total", Help: "Policy filter metrics. For internal use only.", ConstLabels: nil, - }, []string{"subsys", "op"}) + }, []string{"subsys", "op", "warning", "error"}) ) func InitMetrics(registry *prometheus.Registry) { registry.MustRegister(PolicyFilterOpMetrics) // Initialize metrics with labels - PolicyFilterOpMetrics.WithLabelValues(RTHooksSubsys.String(), AddContainerOperation.String()).Add(0) - PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), AddPodOperation.String()).Add(0) - PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), UpdatePodOperation.String()).Add(0) - PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), DeletePodOperation.String()).Add(0) + // RTHooksSubsys + PolicyFilterOpMetrics.WithLabelValues(RTHooksSubsys.String(), AddContainerOperation.String(), + SuccessWarning.String(), SuccessError.String()).Add(0) + PolicyFilterOpMetrics.WithLabelValues(RTHooksSubsys.String(), AddContainerOperation.String(), + ContainerNameNotFoundWarning.String(), SuccessError.String()).Add(0) + PolicyFilterOpMetrics.WithLabelValues(RTHooksSubsys.String(), AddContainerOperation.String(), + SuccessWarning.String(), AddPodContainerError.String()).Add(0) + + // PodHandlersSubsys + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), AddPodOperation.String(), + SuccessWarning.String(), SuccessError.String()).Add(0) + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), AddPodOperation.String(), + SuccessWarning.String(), AddPodError.String()).Add(0) + + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), UpdatePodOperation.String(), + SuccessWarning.String(), SuccessError.String()).Add(0) + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), UpdatePodOperation.String(), + SuccessWarning.String(), UpdatePodError.String()).Add(0) + + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), DeletePodOperation.String(), + SuccessWarning.String(), SuccessError.String()).Add(0) + PolicyFilterOpMetrics.WithLabelValues(PodHandlersSubsys.String(), DeletePodOperation.String(), + SuccessWarning.String(), DeletePodError.String()).Add(0) // NOTES: // * error, error_type, type - standardize on a label @@ -68,8 +125,8 @@ func InitMetrics(registry *prometheus.Registry) { // * Rename policyfilter_metrics_total to get rid of _metrics? } -func OpInc(subsys Subsys, op Operation) { +func OpInc(subsys Subsys, op Operation, warning Warning, errorLabel Error) { PolicyFilterOpMetrics.WithLabelValues( - subsys.String(), op.String(), + subsys.String(), op.String(), warning.String(), errorLabel.String(), ).Inc() } diff --git a/pkg/policyfilter/rthooks/rthooks.go b/pkg/policyfilter/rthooks/rthooks.go index 65c6d1eac5b..e94147cfed9 100644 --- a/pkg/policyfilter/rthooks/rthooks.go +++ b/pkg/policyfilter/rthooks/rthooks.go @@ -109,8 +109,25 @@ func createContainerHook(_ context.Context, arg *rthooks.CreateContainerArg) err cgid := policyfilter.CgroupID(cgID) if err := pfState.AddPodContainer(policyfilter.PodID(podID), namespace, pod.Labels, containerID, cgid, containerName); err != nil { log.WithError(err).Warn("failed to update policy filter, aborting hook.") + policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, + policyfiltermetrics.ContainerNameNotFoundWarning, policyfiltermetrics.AddPodContainerError) + return err + } + + // whether the operation of adding the container to policy can be considered successful + // i.e. no warnings were produced. Used only for adding metrics without errors + operationSuccessMetric := true + + if containerName == "" { + policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, + policyfiltermetrics.ContainerNameNotFoundWarning, policyfiltermetrics.SuccessError) + operationSuccessMetric = false + } + + if operationSuccessMetric { + policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.SuccessError) } - policyfiltermetrics.OpInc(policyfiltermetrics.RTHooksSubsys, policyfiltermetrics.AddContainerOperation) return nil } diff --git a/pkg/policyfilter/state.go b/pkg/policyfilter/state.go index fc59c83478a..2ef4450c11e 100644 --- a/pkg/policyfilter/state.go +++ b/pkg/policyfilter/state.go @@ -315,8 +315,14 @@ func (m *state) getPodEventHandlers() cache.ResourceEventHandlerFuncs { logger.GetLogger().Warn("policyfilter, add-pod handler: unexpected object type: %T", pod) return } - m.updatePodHandler(pod) - policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.AddPodOperation) + err := m.updatePodHandler(pod) + if err != nil { + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.AddPodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.AddPodError) + } else { + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.AddPodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.SuccessError) + } }, UpdateFunc: func(_, newObj interface{}) { pod, ok := newObj.(*v1.Pod) @@ -324,8 +330,14 @@ func (m *state) getPodEventHandlers() cache.ResourceEventHandlerFuncs { logger.GetLogger().Warn("policyfilter, update-pod handler: unexpected object type(s): new:%T", pod) return } - m.updatePodHandler(pod) - policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.UpdatePodOperation) + err := m.updatePodHandler(pod) + if err != nil { + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.UpdatePodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.UpdatePodError) + } else { + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.UpdatePodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.SuccessError) + } }, DeleteFunc: func(obj interface{}) { // Remove all containers for this pod @@ -347,8 +359,12 @@ func (m *state) getPodEventHandlers() cache.ResourceEventHandlerFuncs { "pod-id": podID, "namespace": namespace, }).Warn("policyfilter, delete-pod handler: DelPod failed") + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.DeletePodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.DeletePodError) + } else { + policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.DeletePodOperation, + policyfiltermetrics.SuccessWarning, policyfiltermetrics.SuccessError) } - policyfiltermetrics.OpInc(policyfiltermetrics.PodHandlersSubsys, policyfiltermetrics.DeletePodOperation) }, } }