diff --git a/.gitignore b/.gitignore index 8291286..245dd93 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,6 @@ config.yaml content-*/* *.arg .idea - -.DS_Store \ No newline at end of file +hack/*.img +.DS_Store +test/.env diff --git a/Dockerfile b/Dockerfile index 9e1655a..f4ed526 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,15 +17,18 @@ RUN if [ "${OS_DISTRIBUTION}" = "opensuse-leap" ] && [ "${PROXY_CERT_PATH}" != " update-ca-certificates; \ fi -###########################Add any other image customizations here ####################### +########################### Add any other image customizations here ####################### #### Examples #### ### To install the nginx package for Ubuntu ### # RUN apt-get update && apt-get install nginx -y -### or ### To install the nginx package for opensuse ### # RUN zypper refresh && zypper install nginx -y + +### To add a custom health script for two-node liveness checks ### + +# ADD overlay/files/opt/spectrocloud/bin/check-disk-size.sh /opt/spectrocloud/bin/ diff --git a/Earthfile b/Earthfile index a5e78b9..1ffdf15 100644 --- a/Earthfile +++ b/Earthfile @@ -32,7 +32,8 @@ ARG https_proxy=${HTTPS_PROXY} ARG no_proxy=${NO_PROXY} ARG PROXY_CERT_PATH ARG UPDATE_KERNEL=false - +ARG TWO_NODE=false +ARG KINE_VERSION=0.10.3 ARG ETCD_VERSION="v3.5.5" IF [ "$OS_DISTRIBUTION" = "ubuntu" ] && [ "$BASE_IMAGE" = "" ] @@ -73,20 +74,18 @@ build-all-images: END build-provider-images: - BUILD +provider-image --K8S_VERSION=1.24.6 - BUILD +provider-image --K8S_VERSION=1.25.2 - BUILD +provider-image --K8S_VERSION=1.26.4 - BUILD +provider-image --K8S_VERSION=1.27.2 - BUILD +provider-image --K8S_VERSION=1.25.13 - BUILD +provider-image --K8S_VERSION=1.26.8 - BUILD +provider-image --K8S_VERSION=1.27.5 - BUILD +provider-image --K8S_VERSION=1.27.7 - BUILD +provider-image --K8S_VERSION=1.26.10 - BUILD +provider-image --K8S_VERSION=1.25.15 + # BUILD +provider-image --K8S_VERSION=1.24.6 + # BUILD +provider-image --K8S_VERSION=1.25.2 + # BUILD +provider-image --K8S_VERSION=1.26.4 + # BUILD +provider-image --K8S_VERSION=1.27.2 + # BUILD +provider-image --K8S_VERSION=1.25.13 + # BUILD +provider-image --K8S_VERSION=1.26.8 + # BUILD +provider-image --K8S_VERSION=1.27.5 + # BUILD +provider-image --K8S_VERSION=1.27.7 + # BUILD +provider-image --K8S_VERSION=1.26.10 + # BUILD +provider-image --K8S_VERSION=1.25.15 BUILD +provider-image --K8S_VERSION=1.28.2 - - build-provider-images-fips: IF [ "$K8S_DISTRIBUTION" = "kubeadm-fips" ] BUILD +provider-image --K8S_VERSION=1.24.13 @@ -247,6 +246,7 @@ base-image: ARG BASE_K8S_VERSION=$K8S_VERSION-$K8S_DISTRIBUTION_TAG END + # OS == Ubuntu IF [ "$OS_DISTRIBUTION" = "ubuntu" ] && [ "$ARCH" = "amd64" ] # Add proxy certificate if present IF [ ! -z $PROXY_CERT_PATH ] @@ -275,13 +275,23 @@ base-image: RUN rm -rf /var/cache/* && \ apt clean + + IF $TWO_NODE + RUN apt install -y apt-transport-https ca-certificates curl && \ + echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \ + curl -fsSL -o postgresql.asc https://www.postgresql.org/media/keys/ACCC4CF8.asc && \ + gpg --batch --yes --dearmor -o /etc/apt/trusted.gpg.d/postgresql.gpg postgresql.asc && \ + rm postgresql.asc && \ + apt update && \ + apt install -y postgresql-16 postgresql-contrib-16 iputils-ping + END - # IF OS Type is Opensuse + # OS == Opensuse ELSE IF [ "$OS_DISTRIBUTION" = "opensuse-leap" ] && [ "$ARCH" = "amd64" ] # Add proxy certificate if present IF [ ! -z $PROXY_CERT_PATH ] COPY sc.crt /usr/share/pki/trust/anchors - RUN update-ca-certificates + RUN update-ca-certificates END IF [ "$UPDATE_KERNEL" = "false" ] @@ -289,16 +299,22 @@ base-image: END RUN zypper refresh && \ - zypper update -y - - IF [ -e "/usr/bin/dracut" ] - RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && depmod -a "${kernel}" - RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && dracut -f "/boot/initrd-${kernel}" "${kernel}" && ln -sf "initrd-${kernel}" /boot/initrd - END - # zypper up kernel-default && \ - # zypper purge-kernels && \ - RUN zypper install -y zstd vim iputils bridge-utils curl ethtool tcpdump - RUN zypper cc && \ + zypper update -y + + IF [ -e "/usr/bin/dracut" ] + RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && depmod -a "${kernel}" + RUN --no-cache kernel=$(ls /lib/modules | tail -n1) && dracut -f "/boot/initrd-${kernel}" "${kernel}" && ln -sf "initrd-${kernel}" /boot/initrd + END + # zypper up kernel-default && \ + # zypper purge-kernels && \ + + IF $TWO_NODE + RUN zypper --non-interactive --quiet addrepo --refresh -p 90 http://download.opensuse.org/repositories/server:database:postgresql/openSUSE_Tumbleweed/ PostgreSQL && \ + zypper --gpg-auto-import-keys ref && \ + zypper install -y postgresql-16 postgresql-server-16 postgresql-contrib iputils + END + RUN zypper install -y zstd vim iputils bridge-utils curl ethtool tcpdump && \ + zypper cc && \ zypper clean END @@ -337,6 +353,14 @@ base-image: RUN if grep "security=selinux" /etc/cos/bootargs.cfg > /dev/null; then sed -i 's/security=selinux //g' /etc/cos/bootargs.cfg; fi &&\ if grep "selinux=1" /etc/cos/bootargs.cfg > /dev/null; then sed -i 's/selinux=1/selinux=0/g' /etc/cos/bootargs.cfg; fi + IF $TWO_NODE + RUN mkdir -p /opt/spectrocloud/bin && \ + curl -L https://github.com/k3s-io/kine/releases/download/v${KINE_VERSION}/kine-amd64 | install -m 755 /dev/stdin /opt/spectrocloud/bin/kine + + # ensure psql works ootb for the postgres user + RUN su postgres -c 'echo "export PERL5LIB=/usr/share/perl/5.34:/usr/share/perl5:/usr/lib/x86_64-linux-gnu/perl/5.34" > ~/.bash_profile' + END + # Used to build the installer image. The installer ISO will be created from this. iso-image: FROM --platform=linux/${ARCH} +base-image diff --git a/hack/Earthfile b/hack/Earthfile new file mode 100644 index 0000000..c3118fa --- /dev/null +++ b/hack/Earthfile @@ -0,0 +1,16 @@ +VERSION 0.6 + +ARG OSBUILDER_VERSION=v0.7.11 +ARG OSBUILDER_IMAGE=quay.io/kairos/osbuilder-tools:$OSBUILDER_VERSION +ARG ISO_NAME=debug + +# replace with your CanvOS provider image +ARG PROVIDER_IMAGE=oci:tylergillson/ubuntu:k3s-1.26.4-v4.0.4-071c2c23 + +build: + FROM $OSBUILDER_IMAGE + WORKDIR /build + COPY . ./ + + RUN /entrypoint.sh --name $ISO_NAME --debug build-iso --squash-no-compression --date=false $PROVIDER_IMAGE --output /build/ + SAVE ARTIFACT /build/$ISO_NAME.iso kairos.iso AS LOCAL build/$ISO_NAME.iso diff --git a/hack/README.md b/hack/README.md new file mode 100644 index 0000000..4557131 --- /dev/null +++ b/hack/README.md @@ -0,0 +1,19 @@ +# Debugging Kairos + +If you're facing hard-to-diagnose issues with your custom provider image, you can use the scripts in this directory to obtain verbose Kairos output. + +## Steps +1. Use earthly to generate an ISO from your CanvOS provider image: + ``` + earthly +build --PROVIDER_IMAGE= # e.g., oci:tylergillson/ubuntu:k3s-1.26.4-v4.0.4-071c2c23 + ``` + If successful, `build/debug.iso` will be created. + +2. Launch a local VM based on the debug ISO using QEMU and pipe all output to a log file: + ``` + ./launch-qemu.sh build/debug.iso | tee out.log + ``` + +3. Boot the VM in `Kairos (manual)` mode. Once booted, create `userdata.yaml` with your desired Kairos config and execute a manual Kairos installation: `kairos-agent --debug manual-install --device auto userdata.yaml`. + +4. The VM should eventually reboot itself once the installation completes. Rather than waiting, execute `reboot` to return to the GRUB menu, select `Palette eXtended Kubernetes Edge` and hit `e` to edit it. Add `rd.debug rd.immucore.debug` to the end of the `linux` line, then hit `CTRL+x` to boot with your edits. You should see verbose Kairos debug logs and they will be persisted to `out.log`. diff --git a/hack/build/.keep b/hack/build/.keep new file mode 100644 index 0000000..e69de29 diff --git a/hack/launch-qemu.sh b/hack/launch-qemu.sh new file mode 100755 index 0000000..9aaba67 --- /dev/null +++ b/hack/launch-qemu.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Screenshot capability: +# https://unix.stackexchange.com/a/476617 + +if [ ! -e disk.img ]; then + qemu-img create -f qcow2 disk.img 60g +fi + +# -nic bridge,br=br0,model=virtio-net-pci \ +qemu-system-x86_64 \ + -enable-kvm \ + -cpu "${CPU:=host}" \ + -nographic \ + -spice port=9000,addr=127.0.0.1,disable-ticketing=yes \ + -m ${MEMORY:=10096} \ + -smp ${CORES:=5} \ + -monitor unix:/tmp/qemu-monitor.sock,server=on,wait=off \ + -serial mon:stdio \ + -rtc base=utc,clock=rt \ + -chardev socket,path=qga.sock,server=on,wait=off,id=qga0 \ + -device virtio-serial \ + -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0 \ + -drive if=virtio,media=disk,file=disk.img \ + -drive if=ide,media=cdrom,file="${1}" diff --git a/overlay/files/opt/spectrocloud/bin/check-disk-size.sh b/overlay/files/opt/spectrocloud/bin/check-disk-size.sh new file mode 100755 index 0000000..6dab5a6 --- /dev/null +++ b/overlay/files/opt/spectrocloud/bin/check-disk-size.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e + +REQUIRED_FREE_DISK=$1 + +FREE=$(df -h --output=pcent /var/ | tail -n 1 | tr -d '\% ') + +if (( $FREE < $REQUIRED_FREE_DISK )); then + echo "Not enough free disk, required: $1. Free: $FREE" + exit 1 +fi + +echo "Free disk ok, required: $1. Free: $FREE" +exit 0 diff --git a/test/env.example b/test/env.example new file mode 100644 index 0000000..a51cdbd --- /dev/null +++ b/test/env.example @@ -0,0 +1,37 @@ +# govc vars +export GOVC_USERNAME=@vsphere.local +export GOVC_PASSWORD= +export GOVC_URL=10.10.128.10 # IP address of USDC; edit as needed +export GOVC_INSECURE=true +export GOVC_DATACENTER=Datacenter +export GOVC_DATASTORE=vsanDatastore2 +export GOVC_NETWORK=VM-NETWORK +export GOVC_RESOURCE_POOL= +export GOVC_FOLDER= + +# vSphere vars +export HOST_SUFFIX=-$(git -C ../stylus describe --always) # required to ensure unique edge host IDs +export ISO_FOLDER= e.g. "ISO/01-tyler" +export STYLUS_ISO="${ISO_FOLDER}/stylus-dev-amd64.iso" +export NIC_NAME=ens160 + +# palette vars +export API_KEY= +export PROJECT_UID= +export EDGE_REGISTRATION_TOKEN= +export DOMAIN=dev.spectrocloud.com +export PUBLIC_PACK_REPO_UID= # this varies per Palette tenant, identify via Chrome inspector on Tenant Admin -> Pack Registries page +export CLUSTER_NAME=two-node--$(git -C ../stylus describe --always) +export CLUSTER_PROFILE_UID= # if left blank, a cluster profile will be created +export CLUSTER_VIP= # choose an unassigned VIP + +# image vars +export EARTHLY_BUILDKIT_CACHE_SIZE_MB=500000 +export OCI_REGISTRY=${OCI_REGISTRY:-ttl.sh} +export STYLUS_BRANCH=${STYLUS_BRANCH:-PAD-178-kine-validator} +export PROVIDER_K3S_BRANCH=${PROVIDER_K3S_BRANCH:-two-node} +export K3S_VERSION="1.28.2" +export PE_VERSION="4.1.2" + +# two node vars +export TWO_NODE_BACKEND=postgres diff --git a/test/templates/two-node-cluster-profile.json.tmpl b/test/templates/two-node-cluster-profile.json.tmpl new file mode 100644 index 0000000..5f68898 --- /dev/null +++ b/test/templates/two-node-cluster-profile.json.tmpl @@ -0,0 +1,64 @@ +{ + "metadata": { + "name": "_____place_holder_____", + "description": "", + "labels": {} + }, + "spec": { + "version": "1.0.0", + "template": { + "type": "infra", + "cloudType": "edge-native", + "packs": [ + { + "name": "edge-native-byoi", + "type": "spectro", + "layer": "os", + "version": "1.0.0", + "tag": "1.0.0", + "values": "pack:\n content:\n images:\n - image: \"{{.spectro.pack.edge-native-byoi.options.system.uri}}\"\n # Below config is default value, please uncomment if you want to modify default values\n #drain:\n #cordon: true\n #timeout: 60 # The length of time to wait before giving up, zero means infinite\n #gracePeriod: 60 # Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used\n #ignoreDaemonSets: true\n #deleteLocalData: true # Continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)\n #force: true # Continue even if there are pods that do not declare a controller\n #disableEviction: false # Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution\n #skipWaitForDeleteTimeout: 60 # If pod DeletionTimestamp older than N seconds, skip waiting for the pod. Seconds must be greater than 0 to skip.\nstylusPackage: container://OCI_REGISTRY/stylus-linux-amd64:v0.0.0-STYLUS_HASH\noptions:\n system.uri: \"OCI_REGISTRY/ubuntu:k3s-K3S_VERSION-vPE_VERSION-STYLUS_HASH\"", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + }, + { + "name": "edge-k3s", + "type": "spectro", + "layer": "k8s", + "version": "_____place_holder_____", + "tag": "_____place_holder_____", + "values": "cluster:\n config: |\n flannel-backend: host-gw\n disable-network-policy: true\n disable:\n - traefik\n - local-storage\n - servicelb\n - metrics-server\n\n # configure the pod cidr range\n cluster-cidr: \"192.170.0.0/16\"\n\n # configure service cidr range\n service-cidr: \"192.169.0.0/16\"\n\n # etcd snapshot frequency and number of snapshot retained\n etcd-snapshot-schedule-cron: 0 */1 * * *\n etcd-snapshot-retention: 12\n\n # kubeconfig must be in run for the stylus operator to manage the cluster\n write-kubeconfig: /run/kubeconfig\n write-kubeconfig-mode: 600\n\n # additional component settings to harden installation\n kube-apiserver-arg:\n - anonymous-auth=true\n - profiling=false\n - disable-admission-plugins=AlwaysAdmit\n - default-not-ready-toleration-seconds=20\n - default-unreachable-toleration-seconds=20\n - enable-admission-plugins=AlwaysPullImages,NamespaceLifecycle,ServiceAccount,NodeRestriction,DefaultTolerationSeconds\n - audit-log-path=/var/log/apiserver/audit.log\n - audit-policy-file=/etc/kubernetes/audit-policy.yaml\n - audit-log-maxage=30\n - audit-log-maxbackup=10\n - audit-log-maxsize=100\n - authorization-mode=RBAC,Node\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n kube-controller-manager-arg:\n - profiling=false\n - terminated-pod-gc-threshold=25\n - use-service-account-credentials=true\n - feature-gates=RotateKubeletServerCertificate=true\n - node-monitor-period=5s\n - node-monitor-grace-period=20s\n kube-scheduler-arg:\n - profiling=false\n kubelet-arg:\n - read-only-port=0\n - event-qps=0\n - feature-gates=RotateKubeletServerCertificate=true\n - protect-kernel-defaults=true\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n - rotate-server-certificates=true\nstages:\n initramfs:\n - sysctl:\n vm.overcommit_memory: 1\n kernel.panic: 10\n kernel.panic_on_oops: 1\n kernel.printk: \"0 4 0 7\"\n - directories:\n - path: \"/var/log/apiserver\"\n permissions: 0644\n files:\n - path: /etc/hosts\n permission: \"0644\"\n content: |\n 127.0.0.1 localhost\n - path: \"/etc/kubernetes/audit-policy.yaml\"\n owner_string: \"root\"\n permission: 0600\n content: |\n apiVersion: audit.k8s.io/v1\n kind: Policy\n rules:\n - level: None\n users: [\"system:kube-proxy\"]\n verbs: [\"watch\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\", \"services\", \"services/status\"]\n - level: None\n users: [\"system:unsecured\"]\n namespaces: [\"kube-system\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\"]\n - level: None\n users: [\"kubelet\"] # legacy kubelet identity\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n userGroups: [\"system:nodes\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n users:\n - system:kube-controller-manager\n - system:kube-scheduler\n - system:serviceaccount:kube-system:endpoint-controller\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\"]\n - level: None\n users: [\"system:apiserver\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"namespaces\", \"namespaces/status\", \"namespaces/finalize\"]\n - level: None\n users: [\"cluster-autoscaler\"]\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\", \"endpoints\"]\n # Don't log HPA fetching metrics.\n - level: None\n users:\n - system:kube-controller-manager\n verbs: [\"get\", \"list\"]\n resources:\n - group: \"metrics.k8s.io\"\n # Don't log these read-only URLs.\n - level: None\n nonResourceURLs:\n - /healthz*\n - /version\n - /swagger*\n # Don't log events requests.\n - level: None\n resources:\n - group: \"\" # core\n resources: [\"events\"]\n # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes\n - level: Request\n users: [\"kubelet\", \"system:node-problem-detector\", \"system:serviceaccount:kube-system:node-problem-detector\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n - level: Request\n userGroups: [\"system:nodes\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n # deletecollection calls can be large, don't log responses for expected namespace deletions\n - level: Request\n users: [\"system:serviceaccount:kube-system:namespace-controller\"]\n verbs: [\"deletecollection\"]\n omitStages:\n - \"RequestReceived\"\n # Secrets, ConfigMaps, and TokenReviews can contain sensitive \u0026 binary data,\n # so only log at the Metadata level.\n - level: Metadata\n resources:\n - group: \"\" # core\n resources: [\"secrets\", \"configmaps\"]\n - group: authentication.k8s.io\n resources: [\"tokenreviews\"]\n omitStages:\n - \"RequestReceived\"\n # Get repsonses can be large; skip them.\n - level: Request\n verbs: [\"get\", \"list\", \"watch\"]\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for known APIs\n - level: RequestResponse\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for all other requests.\n - level: Metadata\n omitStages:\n - \"RequestReceived\"\npack:\n palette:\n config:\n oidc:\n identityProvider: noauth", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + }, + { + "name": "cni-custom", + "type": "spectro", + "layer": "cni", + "version": "0.1.0", + "tag": "0.1.0", + "values": "manifests:\n byo-cni:\n contents: |\n apiVersion: v1\n kind: ConfigMap\n metadata:\n name: custom-cni\n data:\n # property-like keys; each key maps to a simple value\n custom-cni: \"byo-cni\"", + "registry": { + "metadata": { + "uid": "_____place_holder_____", + "name": "Public Repo", + "kind": "pack", + "isPrivate": false + } + } + } + ] + } + } +} diff --git a/test/templates/two-node-master-master.json.tmpl b/test/templates/two-node-master-master.json.tmpl new file mode 100644 index 0000000..1dcd625 --- /dev/null +++ b/test/templates/two-node-master-master.json.tmpl @@ -0,0 +1,109 @@ +{ + "metadata": { + "annotations": {}, + "name": "_____place_holder_____", + "labels": {} + }, + "spec": { + "cloudConfig": { + "controlPlaneEndpoint": { + "host": "_____place_holder_____", + "type": "IP" + }, + "sshKeys": [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDDYZpId/d19xuzNnbjkWxlTvctATcwYz+Fre3qOUkkrFJljx39pduukR38Pms8oeEPk6B+GBzwARk9xkEK2SUW+B6ZzCVaBXMHzLiuyzYK9mcHLEnSaYIT7njdAqcFzpBamkPkhUAfsWDcsjgnz0Q7Ilmdi42MW1mqR9M+FibB89Qg/EdFxD0J+VtD/MOZfSDPMEQ+azZMgcWRICn1N9Ods3uH8FCF+PAwVEBo19x34P5xqIyZ7QJjFvNoV96Sr8JuUJWXzMJ6R+7HbH5BMceRsDVd+ZUSX5tQDG4nPrWRVdJN3stLtLNADprXV5BSrDaMOqWK034Or4AI+sqTvmHIBy/b0U4dWAQiJWD6QkLG673UG2qwyZ4GJI4D0KkR7Frj2zwtcufnwHop69R36uJn5xkjJUG92B5GbfolbSjzo0PsQ+Q5NKRJDZZ7conw5RkRb4DYrt17D6BZKbw0X5Gd22MdgPPcnjs4JiZTeKXGkM0kDlTD5jjA4nCs6IEQhI1QLiicHLO5algTf1JHyRUgdMbJA0zlVITDtid3cvRup3JpZW9cdxu3NTqsRRauZj33mfpeRLnuJ2y+cLaWBkkAPpjO87/caUezJJ0r3qzXkIXLu4zCe1RRoZfERUlGvLK+LRUC8IadFTGJl6UhJBApe1UydydOakK45uUBAkDYfw== spectro2023" + ], + "staticIp": false + }, + "machinePoolConfig": [ + { + "cloudConfig": { + "edgeHosts": [ + { + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nicName": "_____place_holder_____", + "staticIP": "", + "twoNodeCandidatePriority": "primary" + }, + { + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nicName": "_____place_holder_____", + "staticIP": "", + "twoNodeCandidatePriority": "secondary" + } + ] + }, + "poolConfig": { + "name": "master-pool", + "labels": [ + "master" + ], + "isControlPlane": true, + "useControlPlaneAsWorker": true, + "taints": [], + "additionalLabels": {}, + "nodeRepaveInterval": 0, + "updateStrategy": { + "type": "RollingUpdateScaleOut" + }, + "machinePoolProperties": { + "archType": "amd64" + }, + "size": 2, + "maxSize": 2, + "minSize": 2 + } + } + ], + "cloudAccountUid": null, + "edgeHostUid": "", + "profiles": [ + { + "uid": "_____place_holder_____", + "packValues": [ + { + "tag": "1.0.0", + "name": "edge-native-byoi", + "type": "spectro", + "values": "pack:\n content:\n images:\n - image: \"{{.spectro.pack.edge-native-byoi.options.system.uri}}\"\n # Below config is default value, please uncomment if you want to modify default values\n #drain:\n #cordon: true\n #timeout: 60 # The length of time to wait before giving up, zero means infinite\n #gracePeriod: 60 # Period of time in seconds given to each pod to terminate gracefully. If negative, the default value specified in the pod will be used\n #ignoreDaemonSets: true\n #deleteLocalData: true # Continue even if there are pods using emptyDir (local data that will be deleted when the node is drained)\n #force: true # Continue even if there are pods that do not declare a controller\n #disableEviction: false # Force drain to use delete, even if eviction is supported. This will bypass checking PodDisruptionBudgets, use with caution\n #skipWaitForDeleteTimeout: 60 # If pod DeletionTimestamp older than N seconds, skip waiting for the pod. Seconds must be greater than 0 to skip.\nstylusPackage: container://OCI_REGISTRY/stylus-linux-amd64:v0.0.0-STYLUS_HASH\noptions:\n system.uri: \"OCI_REGISTRY/ubuntu:k3s-K3S_VERSION-vPE_VERSION-STYLUS_HASH\"", + "manifests": [] + }, + { + "tag": "_____place_holder_____", + "name": "edge-k3s", + "type": "spectro", + "values": "cluster:\n config: |\n flannel-backend: host-gw\n disable-network-policy: true\n disable:\n - traefik\n - local-storage\n - servicelb\n - metrics-server\n\n # configure the pod cidr range\n cluster-cidr: \"192.170.0.0/16\"\n\n # configure service cidr range\n service-cidr: \"192.169.0.0/16\"\n\n # etcd snapshot frequency and number of snapshot retained\n etcd-snapshot-schedule-cron: 0 */1 * * *\n etcd-snapshot-retention: 12\n\n # kubeconfig must be in run for the stylus operator to manage the cluster\n write-kubeconfig: /run/kubeconfig\n write-kubeconfig-mode: 600\n\n # additional component settings to harden installation\n kube-apiserver-arg:\n - anonymous-auth=true\n - profiling=false\n - disable-admission-plugins=AlwaysAdmit\n - default-not-ready-toleration-seconds=20\n - default-unreachable-toleration-seconds=20\n - enable-admission-plugins=AlwaysPullImages,NamespaceLifecycle,ServiceAccount,NodeRestriction,DefaultTolerationSeconds\n - audit-log-path=/var/log/apiserver/audit.log\n - audit-policy-file=/etc/kubernetes/audit-policy.yaml\n - audit-log-maxage=30\n - audit-log-maxbackup=10\n - audit-log-maxsize=100\n - authorization-mode=RBAC,Node\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n kube-controller-manager-arg:\n - profiling=false\n - terminated-pod-gc-threshold=25\n - use-service-account-credentials=true\n - feature-gates=RotateKubeletServerCertificate=true\n - node-monitor-period=5s\n - node-monitor-grace-period=20s\n kube-scheduler-arg:\n - profiling=false\n kubelet-arg:\n - read-only-port=0\n - event-qps=0\n - feature-gates=RotateKubeletServerCertificate=true\n - protect-kernel-defaults=true\n - tls-cipher-suites=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_256_GCM_SHA384,TLS_RSA_WITH_AES_128_GCM_SHA256\n - rotate-server-certificates=true\nstages:\n initramfs:\n - sysctl:\n vm.overcommit_memory: 1\n kernel.panic: 10\n kernel.panic_on_oops: 1\n kernel.printk: \"0 4 0 7\"\n - directories:\n - path: \"/var/log/apiserver\"\n permissions: 0644\n files:\n - path: /etc/hosts\n permission: \"0644\"\n content: |\n 127.0.0.1 localhost\n - path: \"/etc/kubernetes/audit-policy.yaml\"\n owner_string: \"root\"\n permission: 0600\n content: |\n apiVersion: audit.k8s.io/v1\n kind: Policy\n rules:\n - level: None\n users: [\"system:kube-proxy\"]\n verbs: [\"watch\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\", \"services\", \"services/status\"]\n - level: None\n users: [\"system:unsecured\"]\n namespaces: [\"kube-system\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\"]\n - level: None\n users: [\"kubelet\"] # legacy kubelet identity\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n userGroups: [\"system:nodes\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes\", \"nodes/status\"]\n - level: None\n users:\n - system:kube-controller-manager\n - system:kube-scheduler\n - system:serviceaccount:kube-system:endpoint-controller\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"endpoints\"]\n - level: None\n users: [\"system:apiserver\"]\n verbs: [\"get\"]\n resources:\n - group: \"\" # core\n resources: [\"namespaces\", \"namespaces/status\", \"namespaces/finalize\"]\n - level: None\n users: [\"cluster-autoscaler\"]\n verbs: [\"get\", \"update\"]\n namespaces: [\"kube-system\"]\n resources:\n - group: \"\" # core\n resources: [\"configmaps\", \"endpoints\"]\n # Don't log HPA fetching metrics.\n - level: None\n users:\n - system:kube-controller-manager\n verbs: [\"get\", \"list\"]\n resources:\n - group: \"metrics.k8s.io\"\n # Don't log these read-only URLs.\n - level: None\n nonResourceURLs:\n - /healthz*\n - /version\n - /swagger*\n # Don't log events requests.\n - level: None\n resources:\n - group: \"\" # core\n resources: [\"events\"]\n # node and pod status calls from nodes are high-volume and can be large, don't log responses for expected updates from nodes\n - level: Request\n users: [\"kubelet\", \"system:node-problem-detector\", \"system:serviceaccount:kube-system:node-problem-detector\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n - level: Request\n userGroups: [\"system:nodes\"]\n verbs: [\"update\",\"patch\"]\n resources:\n - group: \"\" # core\n resources: [\"nodes/status\", \"pods/status\"]\n omitStages:\n - \"RequestReceived\"\n # deletecollection calls can be large, don't log responses for expected namespace deletions\n - level: Request\n users: [\"system:serviceaccount:kube-system:namespace-controller\"]\n verbs: [\"deletecollection\"]\n omitStages:\n - \"RequestReceived\"\n # Secrets, ConfigMaps, and TokenReviews can contain sensitive & binary data,\n # so only log at the Metadata level.\n - level: Metadata\n resources:\n - group: \"\" # core\n resources: [\"secrets\", \"configmaps\"]\n - group: authentication.k8s.io\n resources: [\"tokenreviews\"]\n omitStages:\n - \"RequestReceived\"\n # Get repsonses can be large; skip them.\n - level: Request\n verbs: [\"get\", \"list\", \"watch\"]\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for known APIs\n - level: RequestResponse\n resources:\n - group: \"\" # core\n - group: \"admissionregistration.k8s.io\"\n - group: \"apiextensions.k8s.io\"\n - group: \"apiregistration.k8s.io\"\n - group: \"apps\"\n - group: \"authentication.k8s.io\"\n - group: \"authorization.k8s.io\"\n - group: \"autoscaling\"\n - group: \"batch\"\n - group: \"certificates.k8s.io\"\n - group: \"extensions\"\n - group: \"metrics.k8s.io\"\n - group: \"networking.k8s.io\"\n - group: \"policy\"\n - group: \"rbac.authorization.k8s.io\"\n - group: \"settings.k8s.io\"\n - group: \"storage.k8s.io\"\n omitStages:\n - \"RequestReceived\"\n # Default level for all other requests.\n - level: Metadata\n omitStages:\n - \"RequestReceived\"\npack:\n palette:\n config:\n oidc:\n identityProvider: noauth", + "manifests": [] + }, + { + "tag": "0.1.0", + "name": "cni-custom", + "type": "spectro", + "values": "manifests:\n byo-cni:\n contents: |\n apiVersion: v1\n kind: ConfigMap\n metadata:\n name: custom-cni\n data:\n # property-like keys; each key maps to a simple value\n custom-cni: \"byo-cni\"", + "manifests": [] + } + ] + } + ], + "policies": { + "scanPolicy": {} + }, + "clusterConfig": { + "machineManagementConfig": { + "osPatchConfig": { + "schedule": "", + "patchOnBoot": false, + "rebootIfRequired": false + } + }, + "updateWorkerPoolsInParallel": false, + "resources": { + "namespaces": [], + "rbacs": [] + }, + "location": null + } + } +} diff --git a/test/templates/two-node-update.json.tmpl b/test/templates/two-node-update.json.tmpl new file mode 100644 index 0000000..9b2d226 --- /dev/null +++ b/test/templates/two-node-update.json.tmpl @@ -0,0 +1,44 @@ +{ + "cloudConfig": { + "edgeHosts": [ + { + "IsCandidateCaption": false, + "hostAddress": "_____place_holder_____", + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "primary" + }, + { + "IsCandidateCaption": false, + "hostAddress": "_____place_holder_____", + "hostName": "_____place_holder_____", + "hostUid": "_____place_holder_____", + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "secondary" + } + ] + }, + "poolConfig": { + "name": "master-pool", + "labels": [ + "master" + ], + "isControlPlane": true, + "useControlPlaneAsWorker": true, + "taints": [], + "additionalLabels": {}, + "nodeRepaveInterval": 0, + "updateStrategy": { + "type": "RollingUpdateScaleIn" + }, + "machinePoolProperties": { + "archType": "amd64" + }, + "size": 2 + } +} \ No newline at end of file diff --git a/test/test-two-node.sh b/test/test-two-node.sh new file mode 100755 index 0000000..b81758d --- /dev/null +++ b/test/test-two-node.sh @@ -0,0 +1,512 @@ +#!/bin/bash + +set -e + +# Usage +# ----- +# +# 1. Install prerequisites: +# - docker (https://docs.docker.com/engine/install/) +# - earthly (https://earthly.dev/get-earthly) +# - git (https://github.com/git-guides/install-git) +# - govc (https://github.com/vmware/govmomi/blob/main/govc/README.md#installation) +# - jq (https://jqlang.github.io/jq/download/) +# - mkisofs (https://command-not-found.com/mkisofs) +# +# 2. Clone CanvOS and checkout this branch. +# +# 3. Configure your Earthly argument file by running: cp .arg.template .arg +# No modifications to the template are required. +# +# 4. Create a .netrc file in the stylus repo root with GitHub +# credentials capable of cloning Spectro Cloud internal repos. +# +# 5. Copy the test/env.example file to test/.env and edit test/.env +# as required. +# +# 6. Source and execute this script: +# +# source ./test/test-two-node.sh +# ./test/test-two-node.sh + +# Do not edit anything below + +if [ -n "$SUFFIX_OVERRIDE" ]; then + export HOST_SUFFIX=$HOST_SUFFIX-$SUFFIX_OVERRIDE + export CLUSTER_NAME=$CLUSTER_NAME-$SUFFIX_OVERRIDE +fi + +# note: host names must start with an alphabetic character as they're DNS names +declare -a vm_array=("tn1-$HOST_SUFFIX" "tn2-$HOST_SUFFIX") +export HOST_1="${vm_array[0]}" +export HOST_2="${vm_array[1]}" + +if [ -n "$REPLACEMENT_HOST" ]; then + export HOST_3="tn3-$HOST_SUFFIX" + vm_array+=($HOST_3) + echo "Added replacement host: $HOST_3" +fi + +function create_canvos_args() { +cat < .arg +CUSTOM_TAG=twonode +IMAGE_REGISTRY=$OCI_REGISTRY +OS_DISTRIBUTION=ubuntu +IMAGE_REPO=ubuntu +OS_VERSION=22 +K8S_DISTRIBUTION=k3s +ISO_NAME=palette-edge-installer +ARCH=amd64 +HTTPS_PROXY= +HTTP_PROXY= +PROXY_CERT_PATH= +UPDATE_KERNEL=false +EOF +} + +function create_userdata() { +cat < build/user-data +#cloud-config +stylus: + site: + edgeHostToken: "$EDGE_REGISTRATION_TOKEN" + name: "$1" + paletteEndpoint: "$DOMAIN" + debug: true +install: + poweroff: true +users: + - name: kairos + passwd: kairos +EOF +echo "created build/user-data" +} + +function create_iso() { + touch meta-data + mkisofs -output build/user-data-$2.iso -volid cidata -joliet -rock $1 meta-data + rm -f meta-data +} + +function create_userdata_isos() { + echo Creating user-data ISOs... + for vm in "${vm_array[@]}"; do + create_userdata $vm + create_iso build/user-data $vm + done +} + +function upload_userdata_isos() { + echo Uploading user-data ISOs... + for vm in "${vm_array[@]}"; do + govc datastore.upload --ds=$GOVC_DATASTORE --dc=$GOVC_DATACENTER "build/user-data-${vm}.iso" "${ISO_FOLDER}/user-data-${vm}.iso" + done +} + +function upload_stylus_iso() { + iso=palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH}.iso + echo Uploading installer ISO $iso... + govc datastore.upload --ds=$GOVC_DATASTORE --dc=$GOVC_DATACENTER build/$iso $STYLUS_ISO +} + +function create_vms() { + echo Creating VMs... + for vm in "${vm_array[@]}"; do + govc vm.create -m 8192 -c 4 -disk 100GB -net.adapter vmxnet3 -iso=$STYLUS_ISO -on=false -pool=$GOVC_RESOURCE_POOL $vm + dev=$(govc device.cdrom.add -vm $vm) + govc device.cdrom.insert -vm=$vm -device=$dev "${ISO_FOLDER}/user-data-${vm}.iso" + govc vm.power -on $vm + done +} + +function destroy_vms() { + for vm in "${vm_array[@]}"; do + govc vm.destroy $vm + done +} + +function wait_for_vms_to_power_off() { + echo Waiting for both VMs to be flashed and power off... + while true; do + powerState1=$(govc vm.info -json=true "${vm_array[0]}" | jq -r .[][0].runtime.powerState) + powerState2=$(govc vm.info -json=true "${vm_array[1]}" | jq -r .[][0].runtime.powerState) + if [ "$powerState1" = "poweredOff" ] && [ "$powerState2" = "poweredOff" ]; then + echo VMs powered off! + break + fi + echo "VMs not powered off, sleeping for 5s..." + sleep 5 + done +} + +function reboot_vms() { + echo "Ejecting installer ISO & rebooting VMs..." + for vm in "${vm_array[@]}"; do + govc device.ls -vm=$vm + govc vm.power -off -force $vm + govc device.cdrom.eject -vm=$vm -device=cdrom-3000 + govc device.cdrom.eject -vm=$vm -device=cdrom-3001 + govc vm.power -on $vm + done +} + +function wait_until_edge_hosts_ready() { + echo Waiting for both Edge Hosts to register and become healthy... + while true; do + set +e + ready=$(curl -s -X POST https://$DOMAIN/v1/dashboard/edgehosts/search \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + { + "filter": { + "conjuction": "and", + "filterGroups": [ + { + "conjunction": "and", + "filters": [ + { + "property": "state", + "type": "string", + "condition": { + "string": { + "operator": "eq", + "negation": false, + "match": { + "conjunction": "or", + "values": [ + "ready", + "unpaired" + ] + }, + "ignoreCase": false + } + } + } + ] + } + ] + }, + "sort": [] + } + ' | jq -e 'select(.items != []).items | map(. | select(.status.health.state == "healthy")) | length') + set -e + if [ -z ${ready} ]; then + ready=0 + fi + if [ $ready = 2 ]; then + echo Both Edge Hosts are healthy! + break + fi + echo "Only $ready/2 Edge Hosts are healthy, sleeping for 5s..." + sleep 5 + done +} + +function destroy_edge_hosts() { + readarray -t edgeHosts < <(curl -s -X POST https://$DOMAIN/v1/dashboard/edgehosts/search \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + { + "filter": { + "conjuction": "and", + "filterGroups": [ + { + "conjunction": "and", + "filters": [ + { + "property": "state", + "type": "string", + "condition": { + "string": { + "operator": "eq", + "negation": false, + "match": { + "conjunction": "or", + "values": [ + "ready", + "unpaired" + ] + }, + "ignoreCase": false + } + } + } + ] + } + ] + }, + "sort": [] + } + ' | jq -r '.items[].metadata.uid') + for host in "${edgeHosts[@]}"; do + curl -s -X DELETE https://$DOMAIN/v1/edgehosts/$host \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" + echo "Deleted Edge Host $host" + done +} + +function prepare_cluster_profile() { + if [ -z "${STYLUS_HASH}" ]; then + echo STYLUS_HASH is unset. Please execute build_all and retry. + return 1 + fi + jq ' + .metadata.name = env.CLUSTER_NAME | + .spec.template.packs[0].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[1].version = env.K3S_VERSION | + .spec.template.packs[1].tag = env.K3S_VERSION | + .spec.template.packs[1].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[2].registry.metadata.uid = env.PUBLIC_PACK_REPO_UID | + .spec.template.packs[0].values |= gsub("OCI_REGISTRY"; env.OCI_REGISTRY) | + .spec.template.packs[0].values |= gsub("PE_VERSION"; env.PE_VERSION) | + .spec.template.packs[0].values |= gsub("K3S_VERSION"; env.K3S_VERSION) | + .spec.template.packs[0].values |= gsub("STYLUS_HASH"; env.STYLUS_HASH) + ' test/templates/two-node-cluster-profile.json.tmpl > two-node-cluster-profile.json +} + +function create_cluster_profile() { + export CLUSTER_PROFILE_UID=$(curl -s -X POST https://$DOMAIN/v1/clusterprofiles/import?publish=true \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-cluster-profile.json | jq -r .uid) + rm -f two-node-cluster-profile.json + if [ "$CLUSTER_PROFILE_UID" = "null" ]; then + echo Cluster Profile creation failed as it already exists. Please delete it and retry. + return 1 + fi + echo "Cluster Profile $CLUSTER_PROFILE_UID created" +} + +function destroy_cluster_profile() { + clusterProfileUid=$1 + curl -s -X DELETE https://$DOMAIN/v1/clusterprofiles/$clusterProfileUid \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" + echo "Cluster Profile $clusterProfileUid deleted" +} + +function prepare_master_master_cluster() { + if [ -z "${STYLUS_HASH}" ]; then + echo STYLUS_HASH is unset. Please execute build_all and retry. + return 1 + fi + if nslookup $CLUSTER_VIP >/dev/null; then + echo CLUSTER_VIP: $CLUSTER_VIP is allocated. Please retry with an unallocated VIP. + return 1 + fi + jq ' + .metadata.name = env.CLUSTER_NAME | + .spec.cloudConfig.controlPlaneEndpoint.host = env.CLUSTER_VIP | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].hostName = env.HOST_1 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].hostUid = env.HOST_1 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[0].nicName = env.NIC_NAME | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].hostName = env.HOST_2 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].hostUid = env.HOST_2 | + .spec.machinePoolConfig[0].cloudConfig.edgeHosts[1].nicName = env.NIC_NAME | + .spec.profiles[0].uid = env.CLUSTER_PROFILE_UID | + .spec.profiles[0].packValues[0].values |= gsub("OCI_REGISTRY"; env.OCI_REGISTRY) | + .spec.profiles[0].packValues[0].values |= gsub("PE_VERSION"; env.PE_VERSION) | + .spec.profiles[0].packValues[0].values |= gsub("K3S_VERSION"; env.K3S_VERSION) | + .spec.profiles[0].packValues[0].values |= gsub("STYLUS_HASH"; env.STYLUS_HASH) | + .spec.profiles[0].packValues[1].tag = env.K3S_VERSION + ' test/templates/two-node-master-master.json.tmpl > two-node-create.json +} + +function create_cluster() { + uid=$(curl -s -X POST https://$DOMAIN/v1/spectroclusters/edge-native?ProjectUid=$PROJECT_UID \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-create.json | jq -r .uid) + if [ "$uid" = "null" ]; then + echo "Cluster creation failed. Please check two-node-create.json and retry creation manually to see Hubble's response." + return 1 + else + rm -f two-node-create.json + echo "Cluster $uid created" + fi +} + +function destroy_cluster() { + clusterUid=$1 + curl -s -X PATCH https://$DOMAIN/v1/spectroclusters/$clusterUid/status/conditions \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d \ + ' + [ + { + "message": "cleaned up", + "reason": "CloudInfrastructureCleanedUp", + "status": "True", + "type": "CloudInfrastructureCleanedUp" + } + ] + ' + echo "Cluster $clusterUid deleted" +} + +function prepare_cluster_update() { + export leaderIp=$1 + export replacementHostIp=$2 + jq ' + .cloudConfig.edgeHosts[0].hostAddress = env.leaderIp | + .cloudConfig.edgeHosts[0].hostName = env.HOST_1 | + .cloudConfig.edgeHosts[0].hostUid = env.HOST_1 | + .cloudConfig.edgeHosts[1].hostAddress = env.replacementHostIp | + .cloudConfig.edgeHosts[1].hostName = env.HOST_3 | + .cloudConfig.edgeHosts[1].hostUid = env.HOST_3 + ' test/templates/two-node-update.json.tmpl > two-node-update.json +} + +function update_cluster() { + cloudConfigUid=$1 + curl -X PUT https://$DOMAIN/v1/cloudconfigs/edge-native/$cloudConfigUid/machinePools/master-pool \ + -H "ApiKey: $API_KEY" \ + -H "Content-Type: application/json" \ + -H "ProjectUid: $PROJECT_UID" \ + -d @two-node-update.json + rm -f two-node-update.json + echo "Cloud config $cloudConfigUid updated" +} + +function build_provider_k3s() { + echo "Building provider-k3s image..." + earthly +build-provider-package \ + --platform=linux/amd64 \ + --IMAGE_REPOSITORY=${OCI_REGISTRY} \ + --VERSION=${PROVIDER_K3S_HASH} + docker push ${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} +} + +function build_stylus_package_and_framework() { + echo "Building stylus image and stylus framework image..." + earthly --allow-privileged +package \ + --platform=linux/amd64 \ + --IMAGE_REPOSITORY=${OCI_REGISTRY} \ + --BASE_IMAGE=quay.io/kairos/core-opensuse-leap:v2.3.2 \ + --VERSION=v0.0.0-${STYLUS_HASH} + docker push ${OCI_REGISTRY}/stylus-linux-amd64:v0.0.0-${STYLUS_HASH} + docker push ${OCI_REGISTRY}/stylus-framework-linux-amd64:v0.0.0-${STYLUS_HASH} +} + +function build_canvos() { + echo "Building provider image & installer ISO..." + earthly +build-all-images \ + --ARCH=amd64 \ + --PROVIDER_BASE=${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} \ + --STYLUS_BASE=${OCI_REGISTRY}/stylus-framework-linux-amd64:v0.0.0-${STYLUS_HASH} \ + --ISO_NAME=palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH} \ + --IMAGE_REGISTRY=${OCI_REGISTRY} \ + --TWO_NODE=true \ + --TWO_NODE_BACKEND=${TWO_NODE_BACKEND} \ + --CUSTOM_TAG=${STYLUS_HASH} \ + --PE_VERSION=v${PE_VERSION} + docker push ${OCI_REGISTRY}/ubuntu:k3s-${K3S_VERSION}-v${PE_VERSION}-${STYLUS_HASH} +} + +function build_all() { + + # optionally build/rebuild provider-k3s + test -d ../provider-k3s || ( cd .. && git clone https://github.com/kairos-io/provider-k3s -b ${PROVIDER_K3S_BRANCH}) + cd ../provider-k3s + export PROVIDER_K3S_HASH=$(git describe --always) + ( + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q ${OCI_REGISTRY}/provider-k3s:${PROVIDER_K3S_HASH} + ) || ( build_provider_k3s ) + + # optionally build/rebuild stylus images + test -d ../stylus || ( cd .. && git clone https://github.com/spectrocloud/stylus -b ${STYLUS_BRANCH} ) + cd ../stylus + export STYLUS_HASH=$(git describe --always) + ( + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q $OCI_REGISTRY/stylus-linux-amd64:v0.0.0-${STYLUS_HASH} + ) || ( build_stylus_package_and_framework ) + + # optionally build/rebuild provider image & installer ISO + cd ../CanvOS + ( + test -f build/palette-edge-installer-stylus-${STYLUS_HASH}-k3s-${PROVIDER_K3S_HASH}.iso && \ + docker image ls --format "{{.Repository}}:{{.Tag}}" | \ + grep -q ${OCI_REGISTRY}/ubuntu:k3s-${K3S_VERSION}-v${PE_VERSION}-${STYLUS_HASH} + ) || ( build_canvos ) +} + +function clean_all() { + docker images | grep $OCI_REGISTRY | awk '{print $3;}' | xargs docker rmi --force + docker images | grep palette-installer | awk '{print $3;}' | xargs docker rmi --force + earthly prune --reset + docker system prune --all --volumes --force +} + +function main() { + + # build all required edge artifacts + build_all + + # upload installer ISO to vSphere + upload_stylus_iso + + # create & upload user-data ISOs, configured to enable two node mode + create_userdata_isos + upload_userdata_isos + + # create VMs in vSphere, wait for the installation phase to complete, + # then power them off, remove the installer ISO, and reboot them + create_vms + wait_for_vms_to_power_off + reboot_vms + + # wait for the VMs to register with Palette and appear as Edge Hosts + wait_until_edge_hosts_ready + + # optionally create a two node Cluster Profile using the latest artifact + # versions - can be skipped by specifying the UID + if [ -z "${CLUSTER_PROFILE_UID}" ]; then + prepare_cluster_profile + create_cluster_profile + fi + + # create a new Edge Native cluster in Palette using the Edge Hosts + # provisioned above, plus the two node Cluster Profile + prepare_master_master_cluster + create_cluster +} + +# This line and the if condition below allow sourcing the script without executing +# the main function +(return 0 2>/dev/null) && sourced=1 || sourced=0 + +if [[ $sourced == 1 ]]; then + script=${BASH_SOURCE[0]} + if [ -z "$script" ]; then + script=$0 + fi + set +e + echo "You can now use any of these functions:" + echo "" + grep ^function $script | grep -v main | awk '{gsub(/function /,""); gsub(/\(\) \{/,""); print;}' + echo +else + envfile=$(dirname "${0}")/.env + if [ -f "${envfile}" ]; then + source "${envfile}" + else + echo "Please create a .env file in the test directory and populate it with the required variables." + exit 1 + fi + main +fi diff --git a/two-node-update.json b/two-node-update.json new file mode 100644 index 0000000..edd2814 --- /dev/null +++ b/two-node-update.json @@ -0,0 +1,44 @@ +{ + "cloudConfig": { + "edgeHosts": [ + { + "IsCandidateCaption": false, + "hostAddress": "10.10.10.1", + "hostName": "tn1-tyler-e16af242", + "hostUid": "tn1-tyler-e16af242", + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "primary" + }, + { + "IsCandidateCaption": false, + "hostAddress": "10.10.10.2", + "hostName": null, + "hostUid": null, + "nic": { + "nicName": "ens160" + }, + "twoNodeCandidatePriority": "secondary" + } + ] + }, + "poolConfig": { + "name": "master-pool", + "labels": [ + "master" + ], + "isControlPlane": true, + "useControlPlaneAsWorker": true, + "taints": [], + "additionalLabels": {}, + "nodeRepaveInterval": 0, + "updateStrategy": { + "type": "RollingUpdateScaleIn" + }, + "machinePoolProperties": { + "archType": "amd64" + }, + "size": 2 + } +}