From 9cbeb7d02ff8168ec182ab462ca0c1911c65ae59 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Fri, 15 Nov 2024 11:32:21 +0100 Subject: [PATCH 1/5] Ignition service for bootstrapping kubeadm and kubelet. --- .../{example.yaml => example-kubeadm.yaml} | 73 ++++++++++++++----- config/samples/kustomization.yaml | 2 +- .../metalstackcluster_controller.go | 18 +++-- 3 files changed, 67 insertions(+), 26 deletions(-) rename config/samples/{example.yaml => example-kubeadm.yaml} (50%) diff --git a/config/samples/example.yaml b/config/samples/example-kubeadm.yaml similarity index 50% rename from config/samples/example.yaml rename to config/samples/example-kubeadm.yaml index 948c757..ba39ef0 100644 --- a/config/samples/example.yaml +++ b/config/samples/example-kubeadm.yaml @@ -29,11 +29,29 @@ spec: networks: - internet-mini-lab --- +apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 +kind: MetalStackMachineTemplate +metadata: + name: metal-test-controlplane +spec: + template: + spec: + image: ubuntu-24.04 + size: v1-small-x86 +--- kind: KubeadmControlPlane apiVersion: controlplane.cluster.x-k8s.io/v1beta1 metadata: name: metal-test-controlplane spec: + replicas: 1 + version: v1.30.6 + machineTemplate: + nodeDrainTimeout: 10m + infrastructureRef: + kind: MetalStackMachineTemplate + apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 + name: metal-test-controlplane kubeadmConfigSpec: format: ignition initConfiguration: @@ -41,21 +59,40 @@ spec: joinConfiguration: controlPlane: {} nodeRegistration: {} - machineTemplate: - nodeDrainTimeout: 10m - infrastructureRef: - kind: MetalStackMachineTemplate - apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 - name: metal-test-controlplane - replicas: 1 - version: v1.30.6 ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1 -kind: MetalStackMachineTemplate -metadata: - name: metal-test-controlplane -spec: - template: - spec: - image: ubuntu-24.04 - size: v1-small-x86 + ignition: + containerLinuxConfig: + additionalConfig: | + systemd: + units: + - name: cluster-api-init.service + enable: true + contents: |- + [Unit] + Description=Prepares the node for bootstrapping with cluster-api kubeadm + Before=kubeadm.service + After=network-online.target + Wants=network-online.target + [Service] + Type=oneshot + Restart=on-failure + RestartSec=5 + StartLimitBurst=0 + EnvironmentFile=/etc/environment + ExecStart=/var/lib/cluster-api-init/bootstrap.sh + [Install] + WantedBy=multi-user.target + files: + - path: /var/lib/cluster-api-init/bootstrap.sh + owner: "root:root" + permissions: "0744" + content: | + #!/usr/bin/env bash + set -eo pipefail + set +x + + curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg + echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.30/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list + + apt-get update + apt-get install -y kubelet kubeadm kubectl + apt-mark hold kubelet kubeadm kubectl diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 2767e50..2291d43 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -2,5 +2,5 @@ namespace: default resources: -- example.yaml +- example-kubeadm.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/internal/controller/metalstackcluster_controller.go b/internal/controller/metalstackcluster_controller.go index 9b88bf1..ac33d47 100644 --- a/internal/controller/metalstackcluster_controller.go +++ b/internal/controller/metalstackcluster_controller.go @@ -166,12 +166,12 @@ func (r *MetalStackClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { } func (r *clusterReconciler) reconcile() error { - nodeCIDR, err := r.ensureNodeNetwork() + nodeNetworkID, err := r.ensureNodeNetwork() if err != nil { return fmt.Errorf("unable to ensure node network: %w", err) } - r.log.Info("reconciled node network", "cidr", nodeCIDR) + r.log.Info("reconciled node network", "network-id", nodeNetworkID) ip, err := r.ensureControlPlaneIP() if err != nil { @@ -197,7 +197,7 @@ func (r *clusterReconciler) reconcile() error { return fmt.Errorf("failed to update infra cluster control plane endpoint: %w", err) } - fwdeploy, err := r.ensureFirewallDeployment(nodeCIDR) + fwdeploy, err := r.ensureFirewallDeployment(nodeNetworkID) if err != nil { return fmt.Errorf("unable to ensure firewall deployment: %w", err) } @@ -259,7 +259,7 @@ func (r *clusterReconciler) ensureNodeNetwork() (string, error) { return "", fmt.Errorf("error creating node network: %w", err) } - return resp.Payload.Prefixes[0], nil + return *resp.Payload.ID, nil case 1: nw := nws[0] @@ -267,7 +267,7 @@ func (r *clusterReconciler) ensureNodeNetwork() (string, error) { return "", errors.New("node network exists but the prefix is gone") } - return nw.Prefixes[0], nil + return *nw.ID, nil default: return "", fmt.Errorf("more than a single node network exists for this cluster, operator investigation is required") } @@ -399,7 +399,7 @@ func (r *clusterReconciler) findControlPlaneIP() ([]*models.V1IPResponse, error) return resp.Payload, nil } -func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.FirewallDeployment, error) { +func (r *clusterReconciler) ensureFirewallDeployment(nodeNetworkID string) (*fcmv2.FirewallDeployment, error) { deploy := &fcmv2.FirewallDeployment{ ObjectMeta: metav1.ObjectMeta{ Name: r.infraCluster.Name, @@ -440,7 +440,7 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi deploy.Spec.Template.Spec.Size = r.infraCluster.Spec.Firewall.Size deploy.Spec.Template.Spec.Image = r.infraCluster.Spec.Firewall.Image - deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeCIDR) + deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeNetworkID) deploy.Spec.Template.Spec.RateLimits = r.infraCluster.Spec.Firewall.RateLimits deploy.Spec.Template.Spec.EgressRules = r.infraCluster.Spec.Firewall.EgressRules deploy.Spec.Template.Spec.LogAcceptedConnections = ptr.Deref(r.infraCluster.Spec.Firewall.LogAcceptedConnections, false) @@ -452,6 +452,10 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi deploy.Spec.Template.Spec.NftablesExporterVersion = "" deploy.Spec.Template.Spec.NftablesExporterURL = "" + // TODO: we need to allow internet connection for the nodes before the firewall-controller can connect to the control-plane + // the FCM currently does not support this + deploy.Spec.Template.Spec.Userdata = "" + // TODO: do we need to generate ssh keys for the machines and the firewall in this controller? deploy.Spec.Template.Spec.SSHPublicKeys = nil From 97de36c2848e1e726fc71a006c3a28f6bc6e4c2b Mon Sep 17 00:00:00 2001 From: Gerrit Date: Fri, 15 Nov 2024 14:26:23 +0100 Subject: [PATCH 2/5] Provide containerd config. --- config/samples/example-kubeadm.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config/samples/example-kubeadm.yaml b/config/samples/example-kubeadm.yaml index ba39ef0..f2359e0 100644 --- a/config/samples/example-kubeadm.yaml +++ b/config/samples/example-kubeadm.yaml @@ -96,3 +96,8 @@ spec: apt-get update apt-get install -y kubelet kubeadm kubectl apt-mark hold kubelet kubeadm kubectl + - path: /etc/containerd/config.toml + owner: "root:root" + permissions: "0644" + content: | + disabled_plugins = [] From c9fa4f595a573f1ec3a1f24769e2fcb45d4a47ef Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 21 Nov 2024 10:38:43 +0100 Subject: [PATCH 3/5] Deploy more stuff from Ansible. --- README.md | 4 +-- capi-lab/deploy.yaml | 8 +++++ capi-lab/firewall-rules.yaml | 23 +++++++++++++++ capi-lab/requirements.yaml | 2 +- .../defaults/main.yaml | 2 ++ .../tasks/main.yaml | 29 +++++++++++++++++++ capi-lab/roles/prometheus/defaults/main.yaml | 3 ++ capi-lab/roles/prometheus/tasks/main.yaml | 22 ++++++++++++++ config/samples/example-kubeadm.yaml | 5 ++++ 9 files changed, 94 insertions(+), 4 deletions(-) create mode 100644 capi-lab/firewall-rules.yaml create mode 100644 capi-lab/roles/firewall-controller-manager/defaults/main.yaml create mode 100644 capi-lab/roles/firewall-controller-manager/tasks/main.yaml create mode 100644 capi-lab/roles/prometheus/defaults/main.yaml create mode 100644 capi-lab/roles/prometheus/tasks/main.yaml diff --git a/README.md b/README.md index 055f767..4e15f1b 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,7 @@ ```bash make -C capi-lab eval $(make -C capi-lab --silent dev-env) -kubectl apply -f ../firewall-controller-manager/config/crds -kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.77.1/bundle.yaml -clusterctl init +EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION=true clusterctl init make push-to-capi-lab kubectl apply -k config/samples diff --git a/capi-lab/deploy.yaml b/capi-lab/deploy.yaml index cc1d482..13c3165 100644 --- a/capi-lab/deploy.yaml +++ b/capi-lab/deploy.yaml @@ -3,8 +3,16 @@ hosts: localhost connection: local gather_facts: false + vars: + setup_yaml: + - url: https://raw.githubusercontent.com/metal-stack/releases/develop/release.yaml + meta_var: metal_stack_release roles: - name: ansible-common - name: metal-roles - name: cert-manager + - name: prometheus + - name: firewall-controller-manager + vars: + firewall_controller_manager_namespace: cap-metal-stack - name: cluster-api-provider-metal-stack diff --git a/capi-lab/firewall-rules.yaml b/capi-lab/firewall-rules.yaml new file mode 100644 index 0000000..6eec376 --- /dev/null +++ b/capi-lab/firewall-rules.yaml @@ -0,0 +1,23 @@ +egress: +- comment: allow outgoing traffic for HTTP and HTTPS and DNS + ports: + - 443 + - 80 + - 53 + protocol: TCP + to: + - 0.0.0.0/0 +- comment: allow outgoing DNS and NTP via UDP + ports: + - 53 + - 123 + protocol: UDP + to: + - 0.0.0.0/0 +ingress: +- comment: allow incoming HTTPS to kube-apiserver + ports: + - 433 + protocol: TCP + from: + - 0.0.0.0/0 diff --git a/capi-lab/requirements.yaml b/capi-lab/requirements.yaml index f3cdbec..f3409b5 100644 --- a/capi-lab/requirements.yaml +++ b/capi-lab/requirements.yaml @@ -6,4 +6,4 @@ version: v0.2.9 - src: https://github.com/metal-stack/metal-roles name: metal-roles - version: v0.14.2 + version: v0.14.5 diff --git a/capi-lab/roles/firewall-controller-manager/defaults/main.yaml b/capi-lab/roles/firewall-controller-manager/defaults/main.yaml new file mode 100644 index 0000000..6bb9002 --- /dev/null +++ b/capi-lab/roles/firewall-controller-manager/defaults/main.yaml @@ -0,0 +1,2 @@ +--- +firewall_controller_manager_namespace: "firewall-controller-manager" diff --git a/capi-lab/roles/firewall-controller-manager/tasks/main.yaml b/capi-lab/roles/firewall-controller-manager/tasks/main.yaml new file mode 100644 index 0000000..0a35dd7 --- /dev/null +++ b/capi-lab/roles/firewall-controller-manager/tasks/main.yaml @@ -0,0 +1,29 @@ +--- +- name: Gather release versions + setup_yaml: + +- name: Create namespace + k8s: + definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ firewall_controller_manager_namespace }}" + labels: + name: "{{ firewall_controller_manager_namespace }}" + +- name: Deploy firewall-controller-manager CRDs + k8s: + definition: "{{ lookup('url', 'https://raw.githubusercontent.com/metal-stack/firewall-controller-manager/refs/tags/' + firewall_controller_manager_image_tag + '/config/crds/' + item, split_lines=False) }}" + namespace: "{{ firewall_controller_manager_namespace }}" + loop: + - firewall.metal-stack.io_firewalldeployments.yaml + - firewall.metal-stack.io_firewallmonitors.yaml + - firewall.metal-stack.io_firewalls.yaml + - firewall.metal-stack.io_firewallsets.yaml + +# - name: Deploy firewall-controller-manager +# k8s: +# definition: + +# namespace: "{{ firewall_controller_manager_namespace }}" diff --git a/capi-lab/roles/prometheus/defaults/main.yaml b/capi-lab/roles/prometheus/defaults/main.yaml new file mode 100644 index 0000000..fcec798 --- /dev/null +++ b/capi-lab/roles/prometheus/defaults/main.yaml @@ -0,0 +1,3 @@ +--- +prometheus_namespace: prometheus +prometheus_helm_chart_version: "66.2.1" diff --git a/capi-lab/roles/prometheus/tasks/main.yaml b/capi-lab/roles/prometheus/tasks/main.yaml new file mode 100644 index 0000000..0e98806 --- /dev/null +++ b/capi-lab/roles/prometheus/tasks/main.yaml @@ -0,0 +1,22 @@ +--- +- name: Create namespace + k8s: + definition: + apiVersion: v1 + kind: Namespace + metadata: + name: "{{ prometheus_namespace }}" + labels: + name: "{{ prometheus_namespace }}" + +- name: Deploy Prometheus + include_role: + name: ansible-common/roles/helm-chart + vars: + helm_chart: kube-prometheus-stack + helm_repo: https://prometheus-community.github.io/helm-charts + helm_force: false + helm_release_name: kube-prometheus-stack + helm_target_namespace: "{{ prometheus_namespace }}" + helm_chart_version: "{{ prometheus_helm_chart_version }}" + # helm_value_file_template: values.yaml diff --git a/config/samples/example-kubeadm.yaml b/config/samples/example-kubeadm.yaml index f2359e0..6f635b9 100644 --- a/config/samples/example-kubeadm.yaml +++ b/config/samples/example-kubeadm.yaml @@ -54,7 +54,12 @@ spec: name: metal-test-controlplane kubeadmConfigSpec: format: ignition + clusterConfiguration: + controlPlaneEndpoint: 203.0.113.129:443 initConfiguration: + localAPIEndpoint: + advertiseAddress: 203.0.113.129 + bindPort: 443 nodeRegistration: {} joinConfiguration: controlPlane: {} From f904b42cd25472a53ee443df343a67a9182ed5f0 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 21 Nov 2024 11:07:52 +0100 Subject: [PATCH 4/5] Typo --- capi-lab/firewall-rules.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/capi-lab/firewall-rules.yaml b/capi-lab/firewall-rules.yaml index 6eec376..22a406f 100644 --- a/capi-lab/firewall-rules.yaml +++ b/capi-lab/firewall-rules.yaml @@ -17,7 +17,7 @@ egress: ingress: - comment: allow incoming HTTPS to kube-apiserver ports: - - 433 + - 443 protocol: TCP from: - 0.0.0.0/0 From e832c2e0f74ae739f42e9ff1a193118b5402c64a Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 21 Nov 2024 12:03:06 +0100 Subject: [PATCH 5/5] Install as binary, GPG does not work easily without tty. --- config/samples/example-kubeadm.yaml | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/config/samples/example-kubeadm.yaml b/config/samples/example-kubeadm.yaml index 6f635b9..8642605 100644 --- a/config/samples/example-kubeadm.yaml +++ b/config/samples/example-kubeadm.yaml @@ -58,7 +58,7 @@ spec: controlPlaneEndpoint: 203.0.113.129:443 initConfiguration: localAPIEndpoint: - advertiseAddress: 203.0.113.129 + advertiseAddress: 10.0.0.2 bindPort: 443 nodeRegistration: {} joinConfiguration: @@ -95,12 +95,25 @@ spec: set -eo pipefail set +x - curl -fsSL https://pkgs.k8s.io/core:/stable:/v1.30/deb/Release.key | gpg --dearmor -o /etc/apt/keyrings/kubernetes-apt-keyring.gpg - echo 'deb [signed-by=/etc/apt/keyrings/kubernetes-apt-keyring.gpg] https://pkgs.k8s.io/core:/stable:/v1.30/deb/ /' | tee /etc/apt/sources.list.d/kubernetes.list + apt update + apt install conntrack - apt-get update - apt-get install -y kubelet kubeadm kubectl - apt-mark hold kubelet kubeadm kubectl + CNI_PLUGINS_VERSION="v1.3.0" + DEST="/opt/cni/bin" + mkdir -p "$DEST" + curl -L "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-amd64-${CNI_PLUGINS_VERSION}.tgz" | sudo tar -C "$DEST" -xz + + RELEASE="v1.30.6" + cd /usr/local/bin + sudo curl -L --remote-name-all https://dl.k8s.io/release/${RELEASE}/bin/linux/amd64/{kubeadm,kubelet,kubectl} + sudo chmod +x {kubeadm,kubelet,kubectl} + + RELEASE_VERSION="v0.16.2" + curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubelet/kubelet.service" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service + sudo mkdir -p /usr/lib/systemd/system/kubelet.service.d + curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubeadm/10-kubeadm.conf" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf + + systemctl enable kubelet.service - path: /etc/containerd/config.toml owner: "root:root" permissions: "0644"