Skip to content

Commit

Permalink
Merge pull request #3585 from twz123/coredns-pdb
Browse files Browse the repository at this point in the history
Add a PodDisruptionBudget for CoreDNS
  • Loading branch information
twz123 authored Oct 26, 2023
2 parents 9c1a0c7 + c802ffd commit c0b94d8
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 19 deletions.
19 changes: 10 additions & 9 deletions inttest/basic/basic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package basic
import (
"bytes"
"context"
"errors"
"fmt"
"strings"
"testing"
Expand All @@ -33,7 +32,6 @@ import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"

"github.com/BurntSushi/toml"
Expand Down Expand Up @@ -233,27 +231,30 @@ func (s *BasicSuite) verifyContainerdDefaultConfig(ctx context.Context) {

func (s *BasicSuite) probeCoreDNSAntiAffinity(ctx context.Context, kc *kubernetes.Clientset) error {
// Wait until both CoreDNS Pods got assigned to a node
pods := map[string]types.UID{}
pods := map[string]string{}

return watch.Pods(kc.CoreV1().Pods("kube-system")).
WithLabels(labels.Set{"k8s-app": "kube-dns"}).
WithErrorCallback(common.RetryWatchErrors(s.T().Logf)).
Until(ctx, func(pod *corev1.Pod) (bool, error) {
// Keep waiting if there's no node assigned yet.
// Keep waiting until there's anti-affinity and node assignment
if a := pod.Spec.Affinity; a == nil || a.PodAntiAffinity == nil {
s.T().Logf("Pod %s doesn't have any pod anti-affinity", pod.Name)
return false, nil
}
nodeName := pod.Spec.NodeName
if nodeName == "" {
s.T().Logf("Pod %s not scheduled yet: %+v", pod.ObjectMeta.Name, pod.Status)
return false, nil
}

uid := pod.GetUID()
if prevUID, ok := pods[nodeName]; ok && uid != prevUID {
return false, errors.New("multiple CoreDNS pods scheduled on the same node")
if prevName, ok := pods[nodeName]; ok && pod.Name != prevName {
return false, fmt.Errorf("multiple CoreDNS pods scheduled on node %s: %s and %s", nodeName, prevName, pod.Name)
}

s.T().Logf("Pod %s scheduled on %s", pod.ObjectMeta.Name, pod.Spec.NodeName)
s.T().Logf("Pod %s scheduled on %s", pod.Name, pod.Spec.NodeName)

pods[nodeName] = pod.GetUID()
pods[nodeName] = pod.Name
return len(pods) > 1, nil
})
}
Expand Down
71 changes: 61 additions & 10 deletions pkg/component/controller/coredns.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/sirupsen/logrus"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/utils/pointer"

"github.com/k0sproject/k0s/internal/pkg/dir"
"github.com/k0sproject/k0s/internal/pkg/templatewriter"
Expand Down Expand Up @@ -120,11 +121,13 @@ metadata:
k8s-app: kube-dns
kubernetes.io/name: "CoreDNS"
spec:
replicas: {{ .Replicas}}
replicas: {{ .Replicas }}
strategy:
type: RollingUpdate
{{- if .MaxUnavailableReplicas }}
rollingUpdate:
maxUnavailable: 1
maxUnavailable: {{ .MaxUnavailableReplicas }}
{{- end }}
selector:
matchLabels:
k8s-app: kube-dns
Expand All @@ -145,6 +148,7 @@ spec:
effect: "NoSchedule"
nodeSelector:
kubernetes.io/os: linux
{{- if not .DisablePodAntiAffinity }}
# Require running coredns replicas on different nodes
affinity:
podAntiAffinity:
Expand All @@ -155,6 +159,7 @@ spec:
- key: k8s-app
operator: In
values: ['kube-dns']
{{- end }}
containers:
- name: coredns
image: {{ .Image }}
Expand Down Expand Up @@ -203,7 +208,7 @@ spec:
path: /ready
port: 8181
scheme: HTTP
initialDelaySeconds: 0
initialDelaySeconds: 30 # give loop plugin time to detect loops
periodSeconds: 2
timeoutSeconds: 1
successThreshold: 1
Expand All @@ -216,6 +221,22 @@ spec:
items:
- key: Corefile
path: Corefile
{{- if not .DisablePodDisruptionBudget }}
---
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
name: coredns
namespace: kube-system
labels:
k8s-app: kube-dns
kubernetes.io/name: "CoreDNS"
spec:
minAvailable: 50%
selector:
matchLabels:
k8s-app: kube-dns
{{- end }}
---
apiVersion: v1
kind: Service
Expand Down Expand Up @@ -264,11 +285,14 @@ type CoreDNS struct {
}

type coreDNSConfig struct {
Replicas int
ClusterDNSIP string
ClusterDomain string
Image string
PullPolicy string
Replicas int
ClusterDNSIP string
ClusterDomain string
Image string
PullPolicy string
MaxUnavailableReplicas *uint
DisablePodAntiAffinity bool
DisablePodDisruptionBudget bool
}

// NewCoreDNS creates new instance of CoreDNS component
Expand Down Expand Up @@ -339,16 +363,43 @@ func (c *CoreDNS) getConfig(ctx context.Context, clusterConfig *v1beta1.ClusterC
}

nodeCount := len(nodes.Items)
replicas := replicaCount(nodeCount)

config := coreDNSConfig{
Replicas: replicas,
Replicas: replicaCount(nodeCount),
ClusterDomain: nodeConfig.Spec.Network.ClusterDomain,
ClusterDNSIP: dns,
Image: clusterConfig.Spec.Images.CoreDNS.URI(),
PullPolicy: clusterConfig.Spec.Images.DefaultPullPolicy,
}

if config.Replicas <= 1 {

// No pod anti-affinity for single replica deployments, so that CoreDNS
// can be rolled on a single node without downtime, as the single
// replica can remain operational until the new replica can take over.
config.DisablePodAntiAffinity = true

// No PodDisruptionBudget for single replica deployments, so that
// CoreDNS may be drained from the node running the single replica. This
// might leave CoreDNS eligible for eviction due to node pressure, but
// such clusters aren't HA in the first place and it seems more
// desirable to not block a drain.
config.DisablePodDisruptionBudget = true

} else if config.Replicas == 2 || config.Replicas == 3 {

// Set maxUnavailable=1 only for deployments with two or three replicas.
// Use the Kubernetes defaults (maxUnavailable=25%, rounded down to get
// absolute values) for all other cases. For single replica deployments,
// maxUnavailable=1 would mean that the deployment's available condition
// would be true even with zero ready replicas. For deployments with 4
// to 7 replicas, it would be the same as the default, and for
// deployments with 8 or more replicas, this would artificially
// constrain the rolling update speed.
config.MaxUnavailableReplicas = pointer.Uint(1)

}

return config, nil
}

Expand Down

0 comments on commit c0b94d8

Please sign in to comment.