From 5abe605a6042a86d811ecaf54a253febbbe5be24 Mon Sep 17 00:00:00 2001 From: xuezhao Date: Tue, 2 Apr 2024 15:14:37 +0800 Subject: [PATCH] Add health checker on agent certificates. (#187) Signed-off-by: GitHub --- cmd/addon-agent/main.go | 60 +++++++++++++------ .../templates/addon-agent-deployment.yaml | 11 ++++ 2 files changed, 54 insertions(+), 17 deletions(-) diff --git a/cmd/addon-agent/main.go b/cmd/addon-agent/main.go index 86a740b4..8931a665 100644 --- a/cmd/addon-agent/main.go +++ b/cmd/addon-agent/main.go @@ -14,9 +14,11 @@ import ( "k8s.io/klog/v2" "k8s.io/klog/v2/textlogger" "open-cluster-management.io/addon-framework/pkg/lease" + addonutils "open-cluster-management.io/addon-framework/pkg/utils" "open-cluster-management.io/cluster-proxy/pkg/common" "open-cluster-management.io/cluster-proxy/pkg/util" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/healthz" ) var ( @@ -84,29 +86,53 @@ func main() { } } - ln, err := net.Listen("tcp", net.JoinHostPort("0.0.0.0", "8888")) + // If the certificates is changed, we need to restart the agent to load the new certificates. + cc, err := addonutils.NewConfigChecker("certificates check", "/etc/tls/tls.crt", "/etc/tls/tls.key") if err != nil { - klog.Fatalf("failed listening: %v", err) + klog.Fatalf("failed create certificates checker: %v", err) } - go func() { - klog.Infof("Starting local health check server") - err := http.Serve(ln, http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + + go serveHealthProbes(ctx.Done(), ":8888", map[string]healthz.Checker{ + "certificates": cc.Check, + "port forward proxy readiness": func(_ *http.Request) error { if !readiness.Load().(bool) { - rw.WriteHeader(http.StatusInternalServerError) - if _, err = rw.Write([]byte("not yet ready")); err != nil { - klog.Errorf("failed to write 'not yet ready': %v", err) - } - klog.Infof("not yet ready") - return - } - if _, err = rw.Write([]byte("ok")); err != nil { - klog.Errorf("failed to write 'ok': %v", err) + return fmt.Errorf("not ready") } - })) - klog.Errorf("health check server aborted: %v", err) - }() + return nil + }, + }) klog.Infof("Starting lease updater") leaseUpdater.Start(ctx) <-ctx.Done() } + +// serveHealthProbes starts a server to check healthz and readyz probes +func serveHealthProbes(stop <-chan struct{}, address string, healthCheckers map[string]healthz.Checker) { + mux := http.NewServeMux() + mux.Handle("/healthz", http.StripPrefix("/healthz", &healthz.Handler{Checks: healthCheckers})) + + server := http.Server{ + Handler: mux, + } + + ln, err := net.Listen("tcp", address) + if err != nil { + klog.Errorf("error listening on %s: %v", address, err) + return + } + + klog.Infof("heath probes server is running...") + // Run server + go func() { + if err := server.Serve(ln); err != nil && err != http.ErrServerClosed { + klog.Fatal(err) + } + }() + + // Shutdown the server when stop is closed + <-stop + if err := server.Shutdown(context.Background()); err != nil { + klog.Fatal(err) + } +} diff --git a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml index 35a74f3c..5d56660c 100644 --- a/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml +++ b/pkg/proxyagent/agent/manifests/charts/addon-agent/templates/addon-agent-deployment.yaml @@ -101,10 +101,21 @@ spec: privileged: false runAsNonRoot: true readOnlyRootFilesystem: true + livenessProbe: + httpGet: + path: /healthz + scheme: HTTP + port: 8888 + initialDelaySeconds: 10 + failureThreshold: 3 + periodSeconds: 10 volumeMounts: - name: hub-kubeconfig mountPath: /etc/kubeconfig/ readOnly: true + - name: hub + mountPath: /etc/tls + readOnly: true resources: requests: memory: "50Mi"