Skip to content

Commit

Permalink
feat: kwok cloud provider (kubernetes-sigs#904)
Browse files Browse the repository at this point in the history
  • Loading branch information
njtran authored Jan 19, 2024
1 parent ac141ed commit 4e85912
Show file tree
Hide file tree
Showing 26 changed files with 1,479 additions and 10 deletions.
43 changes: 38 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,46 @@
# This is the format of an AWS ECR Public Repo as an example.
export KWOK_REPO ?= ${ACCOUNT_ID}.dkr.ecr.${DEFAULT_REGION}.amazonaws.com
export SYSTEM_NAMESPACE=kube-system

HELM_OPTS ?= --set logLevel=debug \
--set controller.resources.requests.cpu=1 \
--set controller.resources.requests.memory=1Gi \
--set controller.resources.limits.cpu=1 \
--set controller.resources.limits.memory=1Gi

help: ## Display help
@awk 'BEGIN {FS = ":.*##"; printf "Usage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)

presubmit: verify test licenses vulncheck ## Run all steps required for code to be checked in

install-kwok: ## Install kwok provider
UNINSTALL_KWOK=false ./hack/install-kwok.sh
uninstall-kwok: ## Install kwok provider
UNINSTALL_KWOK=true ./hack/install-kwok.sh

build: ## Build the Karpenter KWOK controller images using ko build
$(eval CONTROLLER_IMG=$(shell $(WITH_GOFLAGS) KO_DOCKER_REPO="$(KWOK_REPO)" ko build -B sigs.k8s.io/karpenter/kwok))
$(eval IMG_REPOSITORY=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 1))
$(eval IMG_TAG=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 1 | cut -d ":" -f 2 -s))
$(eval IMG_DIGEST=$(shell echo $(CONTROLLER_IMG) | cut -d "@" -f 2))


# Run make install-kwok to install the kwok controller in your cluster first
# Webhooks are currently not supported in the kwok provider.
apply: verify build ## Deploy the kwok controller from the current state of your git repository into your ~/.kube/config cluster
hack/validation/kwok-requirements.sh
kubectl apply -f pkg/apis/crds
helm upgrade --install karpenter kwok/charts --namespace kube-system --skip-crds \
$(HELM_OPTS) \
--set controller.image.repository=$(IMG_REPOSITORY) \
--set controller.image.tag=$(IMG_TAG) \
--set controller.image.digest=$(IMG_DIGEST) \
--set-string controller.env[0].name=ENABLE_PROFILING \
--set-string controller.env[0].value=true

delete: ## Delete the controller from your ~/.kube/config cluster
helm uninstall karpenter --namespace ${KARPENTER_NAMESPACE}

test: ## Run tests
go test ./... \
-race \
Expand All @@ -22,11 +60,6 @@ deflake: ## Run randomized, racing tests until the test fails to catch flakes
-v \
./pkg/...

install-kwok: ## Install kwok into cluster
UNINSTALL_KWOK=false ./hack/install-kwok.sh
uninstall-kwok: ## Install kwok provider
UNINSTALL_KWOK=true ./hack/install-kwok.sh

vulncheck: ## Verify code vulnerabilities
@govulncheck ./pkg/...

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/Pallinder/go-randomdata v1.2.0
github.com/avast/retry-go v3.0.0+incompatible
github.com/deckarep/golang-set v1.8.0
github.com/docker/docker v24.0.7+incompatible
github.com/go-logr/logr v1.4.1
github.com/go-logr/zapr v1.3.0
github.com/imdario/mergo v0.3.16
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/deckarep/golang-set v1.8.0 h1:sk9/l/KqpunDwP7pSjUg0keiOOLEnOBHzykLrsPppp4=
github.com/deckarep/golang-set v1.8.0/go.mod h1:5nI87KwE7wgsBU1F4GKAw2Qod7p5kyS383rP6+o6qqo=
github.com/docker/docker v24.0.7+incompatible h1:Wo6l37AuwP3JaMnZa226lzVXGA3F9Ig1seQen0cKYlM=
github.com/docker/docker v24.0.7+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
Expand Down
3 changes: 0 additions & 3 deletions hack/install-kwok.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ set -euo pipefail

# get the latest version
KWOK_REPO=kubernetes-sigs/kwok
# KWOK_LATEST_RELEASE=$(curl "https://api.github.com/repos/${KWOK_REPO}/releases/latest" | jq -r '.tag_name')
KWOK_LATEST_RELEASE=v0.3.0
# make a base directory for multi-base kustomization
HOME_DIR=$(mktemp -d)
Expand Down Expand Up @@ -86,8 +85,6 @@ EOF

done



cat <<EOF > "${HOME_DIR}/kustomization.yaml"
resources:
- ./partition-a
Expand Down
13 changes: 13 additions & 0 deletions hack/validation/kwok-requirements.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Requirements Validation

# Adding validation for nodeclaim

## checking for restricted labels while filtering out well-known labels
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [
{"message": "label domain \"karpenter.kwok.sh\" is restricted", "rule": "self in [\"karpenter.kwok.sh/instance-cpu\", \"karpenter.kwok.sh/instance-memory\", \"karpenter.kwok.sh/instance-family\", \"karpenter.kwok.sh/instance-size\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.sh\")"}]' -i pkg/apis/crds/karpenter.sh_nodeclaims.yaml

# Adding validation for nodepool

## checking for restricted labels while filtering out well-known labels
yq eval '.spec.versions[0].schema.openAPIV3Schema.properties.spec.properties.template.properties.spec.properties.requirements.items.properties.key.x-kubernetes-validations += [
{"message": "label domain \"karpenter.kwok.sh\" is restricted", "rule": "self in [\"karpenter.kwok.sh/instance-cpu\", \"karpenter.kwok.sh/instance-memory\", \"karpenter.kwok.sh/instance-family\", \"karpenter.kwok.sh/instance-size\"] || !self.find(\"^([^/]+)\").endsWith(\"karpenter.kwok.sh\")"}]' -i pkg/apis/crds/karpenter.sh_nodepools.yaml
58 changes: 58 additions & 0 deletions kwok/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Kwok Provider

Before using the kwok provider, make sure that you don't have an installed version of Karpenter in your cluster.

## Requirements
- Have an image repository that you can build, push, and pull images from.
- For an example on how to set up an image repository refer to [karpenter.sh](https://karpenter.sh/docs/contributing/development-guide/#environment-specific-setup)
- Have a cluster that you can install Karpenter on to.
- For an example on how to make a cluster in AWS, refer to [karpenter.sh](https://karpenter.sh/docs/getting-started/getting-started-with-karpenter/)

## Installing
```bash
make install-kwok
make apply
```

## Create a NodePool

Once kwok is installed and Karpenter successfully applies to the cluster, you should now be able to create a NodePool.

```bash
cat <<EOF | envsubst | kubectl apply -f -
apiVersion: karpenter.sh/v1beta1
kind: NodePool
metadata:
name: default
spec:
template:
spec:
requirements:
- key: kubernetes.io/arch
operator: In
values: ["amd64"]
- key: kubernetes.io/os
operator: In
values: ["linux"]
- key: karpenter.sh/capacity-type
operator: In
values: ["spot"]
nodeClassRef:
name: nil
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenUnderutilized
expireAfter: 720h # 30 * 24h = 720h
EOF
```

## Notes
- The kwok provider will have additional labels `karpenter.kwok.sh/instance-size`, `karpenter.kwok.sh/instance-family`, `karpenter.kwok.sh/instance-cpu`, and `karpenter.sh/instance-memory`. These are only available in the kwok provider to select fake generated instance types. These labels will not work with a real Karpenter installation.
- Additionally, this installs Karpenter with a hard-coded set of instance types. A dynamic set of instance types is not supported yet.

## Uninstalling
```bash
make delete
make uninstall-kwok
```
16 changes: 16 additions & 0 deletions kwok/charts/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: v2
name: karpenter
description: A Helm chart for Karpenter's KwoK Provider, integrated with https://github.com/kubernetes-sigs/kwok.
type: application
version: 0.33.1
appVersion: 0.33.1
keywords:
- cluster
- node
- scheduler
- autoscaling
- lifecycle
home: https://karpenter.sh/
icon: https://repository-images.githubusercontent.com/278480393/dab059c8-caa1-4b55-aaa7-3d30e47a5616
sources:
- https://github.com/kubernetes-sigs/karpenter/
73 changes: 73 additions & 0 deletions kwok/charts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# karpenter-kwok

A Helm chart for Karpenter's kwok provider.

## Documentation

For full Karpenter documentation please checkout [https://karpenter.sh](https://karpenter.sh/docs/).

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| additionalAnnotations | object | `{}` | Additional annotations to add into metadata. |
| additionalClusterRoleRules | list | `[]` | Specifies additional rules for the core ClusterRole. |
| additionalLabels | object | `{}` | Additional labels to add into metadata. |
| affinity | object | `{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"karpenter.sh/nodepool","operator":"DoesNotExist"}]}]}},"podAntiAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":[{"topologyKey":"kubernetes.io/hostname"}]}}` | Affinity rules for scheduling the pod. If an explicit label selector is not provided for pod affinity or pod anti-affinity one will be created from the pod selector labels. |
| controller.env | list | `[]` | Additional environment variables for the controller pod. |
| controller.envFrom | list | `[]` | |
| controller.extraVolumeMounts | list | `[]` | Additional volumeMounts for the controller pod. |
| controller.healthProbe.port | int | `8081` | The container port to use for http health probe. |
| controller.image.digest | string | `"sha256:5e5f59f74d86ff7f13d7d80b89afff8c661cb4e3265f2fdda95b76dd9c838cc1"` | SHA256 digest of the controller image. |
| controller.image.repository | string | `"public.ecr.aws/karpenter/controller"` | Repository path to the controller image. |
| controller.image.tag | string | `"v0.33.0"` | Tag of the controller image. |
| controller.metrics.port | int | `8000` | The container port to use for metrics. |
| controller.resources | object | `{}` | Resources for the controller pod. |
| controller.sidecarContainer | list | `[]` | Additional sidecarContainer config |
| controller.sidecarVolumeMounts | list | `[]` | Additional volumeMounts for the sidecar - this will be added to the volume mounts on top of extraVolumeMounts |
| dnsConfig | object | `{}` | Configure DNS Config for the pod |
| dnsPolicy | string | `"Default"` | Configure the DNS Policy for the pod |
| extraVolumes | list | `[]` | Additional volumes for the pod. |
| fullnameOverride | string | `""` | Overrides the chart's computed fullname. |
| hostNetwork | bool | `false` | Bind the pod to the host network. This is required when using a custom CNI. |
| imagePullPolicy | string | `"IfNotPresent"` | Image pull policy for Docker images. |
| imagePullSecrets | list | `[]` | Image pull secrets for Docker images. |
| logConfig | object | `{"enabled":false,"errorOutputPaths":["stderr"],"logEncoding":"json","logLevel":{"controller":"info","global":"info","webhook":"error"},"outputPaths":["stdout"]}` | Log configuration (Deprecated: Logging configuration will be dropped by v1, use logLevel instead) |
| logConfig.enabled | bool | `false` | Whether to enable provisioning and mounting the log ConfigMap |
| logConfig.errorOutputPaths | list | `["stderr"]` | Log errorOutputPaths - defaults to stderr only |
| logConfig.logEncoding | string | `"json"` | Log encoding - defaults to json - must be one of 'json', 'console' |
| logConfig.logLevel | object | `{"controller":"info","global":"info","webhook":"error"}` | Component-based log configuration |
| logConfig.logLevel.controller | string | `"info"` | Controller log level, defaults to 'info' |
| logConfig.logLevel.global | string | `"info"` | Global log level, defaults to 'info' |
| logConfig.logLevel.webhook | string | `"error"` | Error log level, defaults to 'error' |
| logConfig.outputPaths | list | `["stdout"]` | Log outputPaths - defaults to stdout only |
| logLevel | string | `"info"` | Global log level, defaults to 'info' |
| nameOverride | string | `""` | Overrides the chart's name. |
| nodeSelector | object | `{"kubernetes.io/os":"linux"}` | Node selectors to schedule the pod to nodes with labels. |
| podAnnotations | object | `{}` | Additional annotations for the pod. |
| podDisruptionBudget.maxUnavailable | int | `1` | |
| podDisruptionBudget.name | string | `"karpenter"` | |
| podLabels | object | `{}` | Additional labels for the pod. |
| podSecurityContext | object | `{"fsGroup":65536}` | SecurityContext for the pod. |
| priorityClassName | string | `"system-cluster-critical"` | PriorityClass name for the pod. |
| replicas | int | `2` | Number of replicas. |
| revisionHistoryLimit | int | `10` | The number of old ReplicaSets to retain to allow rollback. |
| serviceAccount.annotations | object | `{}` | Additional annotations for the ServiceAccount. |
| serviceAccount.create | bool | `true` | Specifies if a ServiceAccount should be created. |
| serviceAccount.name | string | `""` | The name of the ServiceAccount to use. If not set and create is true, a name is generated using the fullname template. |
| serviceMonitor.additionalLabels | object | `{}` | Additional labels for the ServiceMonitor. |
| serviceMonitor.enabled | bool | `false` | Specifies whether a ServiceMonitor should be created. |
| serviceMonitor.endpointConfig | object | `{}` | Endpoint configuration for the ServiceMonitor. |
| settings | object | `{"batchIdleDuration":"1s","batchMaxDuration":"10s","featureGates":{"drift":true,"spotToSpotConsolidation":false}}` | Global Settings to configure Karpenter |
| settings.batchIdleDuration | string | `"1s"` | The maximum amount of time with no new ending pods that if exceeded ends the current batching window. If pods arrive faster than this time, the batching window will be extended up to the maxDuration. If they arrive slower, the pods will be batched separately. |
| settings.batchMaxDuration | string | `"10s"` | The maximum length of a batch window. The longer this is, the more pods we can consider for provisioning at one time which usually results in fewer but larger nodes. |
| settings.featureGates | object | `{"drift":true}` | Feature Gate configuration values. Feature Gates will follow the same graduation process and requirements as feature gates in Kubernetes. More information here https://kubernetes.io/docs/reference/command-line-tools-reference/feature-gates/#feature-gates-for-alpha-or-beta-features |
| settings.featureGates.drift | bool | `true` | drift is in BETA and is enabled by default. Setting drift to false disables the drift disruption method to watch for drift between currently deployed nodes and the desired state of nodes set in nodepools and nodeclasses |
| strategy | object | `{"rollingUpdate":{"maxUnavailable":1}}` | Strategy for updating the pod. |
| terminationGracePeriodSeconds | string | `nil` | Override the default termination grace period for the pod. |
| tolerations | list | `[{"key":"CriticalAddonsOnly","operator":"Exists"}]` | Tolerations to allow the pod to be scheduled to nodes with taints. |
| topologySpreadConstraints | list | `[{"maxSkew":1,"topologyKey":"topology.kubernetes.io/zone","whenUnsatisfiable":"ScheduleAnyway"}]` | Topology spread constraints to increase the controller resilience by distributing pods across the cluster zones. If an explicit label selector is not provided one will be created from the pod selector labels. |
| webhook.enabled | bool | `false` | Whether to enable the webhooks and webhook permissions. |
| webhook.metrics.port | int | `8001` | The container port to use for webhook metrics. |
| webhook.port | int | `8443` | The container port to use for the webhook. |

1 change: 1 addition & 0 deletions kwok/charts/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../../../pkg/apis/crds/karpenter.sh_nodeclaims.yaml
1 change: 1 addition & 0 deletions kwok/charts/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
../../../pkg/apis/crds/karpenter.sh_nodepools.yaml
Loading

0 comments on commit 4e85912

Please sign in to comment.