From 2aa5b7c049adec3b42ba79cc75829c83be5a9c4c Mon Sep 17 00:00:00 2001 From: flyik Date: Thu, 26 Sep 2024 14:33:35 +0300 Subject: [PATCH] Service annotations for healthchecks (#66) * add service annotations for nlb healtcheck configuration * force defaults and check if resource needs update --- README.md | 4 + go.mod | 2 +- pkg/cloudprovider/yandex/load_balancer.go | 102 +++++++++++++++++++--- pkg/yapi/loadbalancer.go | 7 ++ 4 files changed, 101 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 6fd2f44..873626f 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,10 @@ Due to API limitations, only one subnet from each zone must be present in each N * `yandex.cpi.flant.com/listener-address-ipv4` – select pre-defined IPv4 address. Works both on internal and external NetworkLoadBalancers. * `yandex.cpi.flant.com/loadbalancer-external` – override `YANDEX_CLOUD_DEFAULT_LB_LISTENER_SUBNET_ID` per-service. * `yandex.cpi.flant.com/target-group-name-prefix` - set target group for LB to target group with name `yandex.cpi.flant.com/target-group-name-prefix` annotation value + yandex cluster name + `YANDEX_CLOUD_DEFAULT_LB_TARGET_GROUP_NETWORK_ID`. +* `yandex.cpi.flant.com/healthcheck-interval-seconds` - healthcheck interval(default 2). +* `yandex.cpi.flant.com/healthcheck-timeout-seconds` - healthcheck timeout(default 1). +* `yandex.cpi.flant.com/healthcheck-unhealthy-threshold` - healthcheck unhealthy threshold(default 2). +* `yandex.cpi.flant.com/healthcheck-healthy-threshold` - healthcheck healthy threshold(default 2). ##### Node annotations diff --git a/go.mod b/go.mod index 01a5639..f12a4b1 100644 --- a/go.mod +++ b/go.mod @@ -12,6 +12,7 @@ require ( golang.org/x/sync v0.1.0 google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 google.golang.org/grpc v1.51.0 + google.golang.org/protobuf v1.28.1 k8s.io/api v0.27.3 k8s.io/apimachinery v0.27.3 k8s.io/client-go v0.27.3 @@ -95,7 +96,6 @@ require ( golang.org/x/text v0.8.0 // indirect golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.28.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/pkg/cloudprovider/yandex/load_balancer.go b/pkg/cloudprovider/yandex/load_balancer.go index 7b09147..440fb35 100644 --- a/pkg/cloudprovider/yandex/load_balancer.go +++ b/pkg/cloudprovider/yandex/load_balancer.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/yandex-cloud/go-genproto/yandex/cloud/loadbalancer/v1" + "google.golang.org/protobuf/types/known/durationpb" v1 "k8s.io/api/core/v1" svchelpers "k8s.io/cloud-provider/service/helpers" ) @@ -20,6 +21,12 @@ const ( listenerSubnetIdAnnotation = "yandex.cpi.flant.com/listener-subnet-id" listenerAddressIPv4 = "yandex.cpi.flant.com/listener-address-ipv4" + // healthcheck options + healthcheckIntervalSeconds = "yandex.cpi.flant.com/healthcheck-interval-seconds" + healthcheckTimeoutSeconds = "yandex.cpi.flant.com/healthcheck-timeout-seconds" + healthcheckUnhealthyThreshold = "yandex.cpi.flant.com/healthcheck-unhealthy-threshold" + healthcheckHealthyThreshold = "yandex.cpi.flant.com/healthcheck-healthy-threshold" + nodesHealthCheckPath = "/healthz" // NOTE: Please keep the following port in sync with ProxyHealthzPort in pkg/cluster/ports/ports.go // ports.ProxyHealthzPort was not used here to avoid dependencies to k8s.io/kubernetes @@ -114,7 +121,11 @@ func (yc *Cloud) ensureLB(ctx context.Context, service *v1.Service, nodes []*v1. } lbName := defaultLoadBalancerName(service) - lbParams := yc.getLoadBalancerParameters(service) + lbParams, err := yc.getLoadBalancerParameters(service) + + if err != nil { + return nil, fmt.Errorf("error while extracting parameters: %w", err) + } var listenerSpecs []*loadbalancer.ListenerSpec for index, svcPort := range service.Spec.Ports { @@ -165,21 +176,46 @@ func (yc *Cloud) ensureLB(ctx context.Context, service *v1.Service, nodes []*v1. hcPath, hcPort = svchelpers.GetServiceHealthCheckPathPort(service) } - log.Printf("Health checking on path %q and port %v", hcPath, hcPort) - healthChecks := []*loadbalancer.HealthCheck{ - { - Name: "kube-health-check", - UnhealthyThreshold: 2, - HealthyThreshold: 2, - Options: &loadbalancer.HealthCheck_HttpOptions_{ - HttpOptions: &loadbalancer.HealthCheck_HttpOptions{ - Port: int64(hcPort), - Path: hcPath, - }, + healthCheck := &loadbalancer.HealthCheck{ + Name: "kube-health-check", + Interval: &durationpb.Duration{Seconds: 2}, + Timeout: &durationpb.Duration{Seconds: 1}, + UnhealthyThreshold: 2, + HealthyThreshold: 2, + Options: &loadbalancer.HealthCheck_HttpOptions_{ + HttpOptions: &loadbalancer.HealthCheck_HttpOptions{ + Port: int64(hcPort), + Path: hcPath, }, }, } + if lbParams.healthcheckIntervalSeconds > 0 { + healthCheck.Interval = &durationpb.Duration{Seconds: int64(lbParams.healthcheckIntervalSeconds)} + } + + if lbParams.healthcheckTimeoutSeconds > 0 { + healthCheck.Timeout = &durationpb.Duration{Seconds: int64(lbParams.healthcheckTimeoutSeconds)} + } + + if lbParams.healthcheckUnhealthyThreshold > 0 { + healthCheck.UnhealthyThreshold = int64(lbParams.healthcheckUnhealthyThreshold) + } + + if lbParams.healthcheckHealthyThreshold > 0 { + healthCheck.HealthyThreshold = int64(lbParams.healthcheckHealthyThreshold) + } + + log.Printf("Health checking on path %q and port %v; interval %v, timeout %v, UnhealthyThreshold %d, HealthyThreshold %d", + healthCheck.GetHttpOptions().Path, + healthCheck.GetHttpOptions().Port, + healthCheck.GetInterval(), + healthCheck.GetTimeout(), + healthCheck.GetUnhealthyThreshold(), + healthCheck.GetHealthyThreshold(), + ) + healthChecks := []*loadbalancer.HealthCheck{healthCheck} + tgName := lbParams.targetGroupNamePrefix + yc.config.ClusterName + lbParams.targetGroupNetworkID tg, err := yc.yandexService.LbSvc.GetTgByName(ctx, tgName) @@ -209,9 +245,14 @@ type loadBalancerParameters struct { listenerSubnetID string listenerAddressIPv4 string internal bool + + healthcheckIntervalSeconds int + healthcheckTimeoutSeconds int + healthcheckUnhealthyThreshold int + healthcheckHealthyThreshold int } -func (yc *Cloud) getLoadBalancerParameters(svc *v1.Service) (lbParams loadBalancerParameters) { +func (yc *Cloud) getLoadBalancerParameters(svc *v1.Service) (lbParams loadBalancerParameters, err error) { if value, ok := svc.ObjectMeta.Annotations[listenerSubnetIdAnnotation]; ok { lbParams.internal = true lbParams.listenerSubnetID = value @@ -235,5 +276,40 @@ func (yc *Cloud) getLoadBalancerParameters(svc *v1.Service) (lbParams loadBalanc lbParams.targetGroupNamePrefix = value } + if value, ok := svc.ObjectMeta.Annotations[healthcheckIntervalSeconds]; ok { + lbParams.healthcheckIntervalSeconds, err = tryAnnotationValueToInt(healthcheckIntervalSeconds, value) + if err != nil { + return + } + } + + if value, ok := svc.ObjectMeta.Annotations[healthcheckTimeoutSeconds]; ok { + lbParams.healthcheckTimeoutSeconds, err = tryAnnotationValueToInt(healthcheckTimeoutSeconds, value) + if err != nil { + return + } + } + + if value, ok := svc.ObjectMeta.Annotations[healthcheckHealthyThreshold]; ok { + lbParams.healthcheckHealthyThreshold, err = tryAnnotationValueToInt(healthcheckHealthyThreshold, value) + if err != nil { + return + } + } + + if value, ok := svc.ObjectMeta.Annotations[healthcheckUnhealthyThreshold]; ok { + lbParams.healthcheckUnhealthyThreshold, err = tryAnnotationValueToInt(healthcheckUnhealthyThreshold, value) + if err != nil { + return + } + } return } + +func tryAnnotationValueToInt(name, value string) (int, error) { + v, err := strconv.Atoi(value) + if err != nil { + return v, fmt.Errorf("can't convert value of annotation %q to int. value: %q, error %w", name, value, err) + } + return v, nil +} diff --git a/pkg/yapi/loadbalancer.go b/pkg/yapi/loadbalancer.go index b97d1d8..6e7d156 100644 --- a/pkg/yapi/loadbalancer.go +++ b/pkg/yapi/loadbalancer.go @@ -506,5 +506,12 @@ func nlbAttachedTargetGroupsAreEqual(actual *loadbalancer.AttachedTargetGroup, e if actualHealthCheckHttpOptions.Path != expectedHealthCheckHttpOptions.Path { return false } + + if actualHealthCheck.Interval.GetSeconds() != expectedHealthCheck.Interval.GetSeconds() { + return false + } + if actualHealthCheck.Timeout.GetSeconds() != expectedHealthCheck.Timeout.GetSeconds() { + return false + } return true }