Skip to content
This repository has been archived by the owner on Mar 5, 2024. It is now read-only.

Adding support for GKE preemptibles nodes #30

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ Nidhogg was built using [Kubebuilder](https://github.com/kubernetes-sigs/kubebui
## Usage

Nidhogg requires a yaml/json config file to tell it what Daemonsets to watch and what nodes to act on.
`nodeSelector` is a map of keys/values corresponding to node labels. `daemonsets` is an array of Daemonsets to watch, each containing two fields `name` and `namespace`. Nodes are tainted with taint that follows the format of `nidhogg.uswitch.com/namespace.name:NoSchedule`.
`nodeSelector` is a map of keys/values corresponding to node labels. `daemonsets` is an array of Daemonsets to watch, each containing two mandatory fields `name` and `namespace` and one optional field `effect` (the effect will be set to `NoSchedule` by default). Nodes are tainted with taint that follows the format of `nidhogg.uswitch.com/{namespace}.{name}:{effect}`.

Example:

YAML:

```yaml
nodeSelector:
node-role.kubernetes.io/node
nodeSelector: node-role.kubernetes.io/node
daemonsets:
- name: kiam
namespace: kube-system
namespace: kube-system
effect: NoSchedule # default value, can be omitted
```

JSON:

```json
Expand All @@ -40,27 +42,30 @@ JSON:
]
}
```
This example will select any nodes in AWS ASGs named "standard" or "special" that have the label
`node-role.kubernetes.io/node` present, and no nodes with label `node-role.kubernetes.io/master`. If the matching nodes
do not have a running and ready pod from the `kiam` daemonset in the `kube-system` namespace. It will add a taint of

This example will select any nodes in AWS ASGs named "standard" or "special" that have the label
`node-role.kubernetes.io/node` present, and no nodes with label `node-role.kubernetes.io/master`. If the matching nodes
do not have a running and ready pod from the `kiam` daemonset in the `kube-system` namespace. It will add a taint of
`nidhogg.uswitch.com/kube-system.kiam:NoSchedule` until there is a ready kiam pod on the node.

If you want pods to be able to run on the nidhogg tainted nodes you can add a toleration:

```yaml
spec:
tolerations:
- key: nidhogg.uswitch.com/kube-system.kiam
operator: "Exists"
effect: NoSchedule
- key: nidhogg.uswitch.com/kube-system.kiam
operator: "Exists"
effect: NoSchedule
```

## Deploying

Docker images can be found at https://quay.io/uswitch/nidhogg

Example [Kustomize](https://github.com/kubernetes-sigs/kustomize) manifests can be found [here](/config) to quickly deploy this to a cluster.
Example [Kustomize](https://github.com/kubernetes-sigs/kustomize) manifests can be found [here](/config) to quickly deploy this to a cluster.

## Flags

```
-config-file string
Path to config file (default "config.json")
Expand All @@ -77,3 +82,7 @@ Example [Kustomize](https://github.com/kubernetes-sigs/kustomize) manifests can
-metrics-addr string
The address the metric endpoint binds to. (default ":8080")
```

## GKE Preemptible Nodes Support

If you are using Nidhogg on GKE with preemptible nodes, make sure to set `effect:NoExecute` for the DaemonSets. This is required because when a node gets preempted and comes back online, the pods will already be scheduled on it, therefore the `effect:NoSchedule` will not prevent pods without a toleration to start running.
37 changes: 29 additions & 8 deletions pkg/nidhogg/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,11 @@ func (hc *HandlerConfig) BuildSelectors() error {
return nil
}

// Daemonset contains the name and namespace of a Daemonset
// Daemonset contains the name and namespace of a Daemonset. Also includes the effect to set for the taint.
type Daemonset struct {
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
Effect corev1.TaintEffect `json:"effect,omitempty" yaml:"effect,omitempty"`
}

type taintChanges struct {
Expand Down Expand Up @@ -159,7 +160,7 @@ func (h *Handler) HandleNode(instance *corev1.Node) (reconcile.Result, error) {
}

func (h *Handler) calculateTaints(instance *corev1.Node) (*corev1.Node, taintChanges, error) {

log := logf.Log.WithName("nidhogg")
nodeCopy := instance.DeepCopy()

var changes taintChanges
Expand All @@ -181,20 +182,22 @@ func (h *Handler) calculateTaints(instance *corev1.Node) (*corev1.Node, taintCha
return nil, taintChanges{}, fmt.Errorf("error fetching pods: %v", err)
}

if pod != nil && podReady(pod) {
if pod != nil && podReady(pod) && nodeReady(nodeCopy) {
// if the taint is in the taintsToRemove map, it'll be removed
continue
}
log.Info(fmt.Sprintf("Pod does not exist or is not ready, or node is not ready"))
// pod doesn't exist or is not ready
_, ok := taintsToRemove[taint]
if ok {
// we want to keep this already existing taint on it
log.Info(fmt.Sprintf("Keeping taint %s", taint))
delete(taintsToRemove, taint)
continue
}
// taint is not already present, adding it
changes.taintsAdded = append(changes.taintsAdded, taint)
nodeCopy.Spec.Taints = addTaint(nodeCopy.Spec.Taints, taint)
nodeCopy.Spec.Taints = addTaint(nodeCopy.Spec.Taints, taint, daemonset.Effect)
}
for taint := range taintsToRemove {
nodeCopy.Spec.Taints = removeTaint(nodeCopy.Spec.Taints, taint)
Expand Down Expand Up @@ -225,16 +228,34 @@ func (h *Handler) getDaemonsetPod(nodeName string, ds Daemonset) (*corev1.Pod, e
}

func podReady(pod *corev1.Pod) bool {
log := logf.Log.WithName("nidhogg")
for _, status := range pod.Status.ContainerStatuses {
if status.Ready == false {
log.Info(fmt.Sprintf("Pod %s is not ready", pod.Name))
return false
}
}
log.Info(fmt.Sprintf("Pod %s is ready", pod.Name))
return true
}

func addTaint(taints []corev1.Taint, taintName string) []corev1.Taint {
return append(taints, corev1.Taint{Key: taintName, Effect: corev1.TaintEffectNoSchedule})
func nodeReady(node *corev1.Node) bool {
log := logf.Log.WithName("nidhogg")
for _, condition := range node.Status.Conditions {
if condition.Type == corev1.NodeReady && condition.Status == corev1.ConditionFalse {
log.Info(fmt.Sprintf("Node %s is not ready", node.Name))
return false
}
}
log.Info(fmt.Sprintf("Node %s is ready", node.Name))
return true
}

func addTaint(taints []corev1.Taint, taintName string, effect corev1.TaintEffect) []corev1.Taint {
if effect == "" {
effect = corev1.TaintEffectNoSchedule
}
return append(taints, corev1.Taint{Key: taintName, Effect: effect})
}

func removeTaint(taints []corev1.Taint, taintName string) []corev1.Taint {
Expand Down