Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignition service for bootstrapping kubeadm and kubelet. #8

Merged
merged 5 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@
```bash
make -C capi-lab
eval $(make -C capi-lab --silent dev-env)
kubectl apply -f ../firewall-controller-manager/config/crds
kubectl create -f https://github.com/prometheus-operator/prometheus-operator/releases/download/v0.77.1/bundle.yaml
clusterctl init
EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION=true clusterctl init
make push-to-capi-lab

kubectl apply -k config/samples
Expand Down
8 changes: 8 additions & 0 deletions capi-lab/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@
hosts: localhost
connection: local
gather_facts: false
vars:
setup_yaml:
- url: https://raw.githubusercontent.com/metal-stack/releases/develop/release.yaml
meta_var: metal_stack_release
roles:
- name: ansible-common
- name: metal-roles
- name: cert-manager
- name: prometheus
- name: firewall-controller-manager
vars:
firewall_controller_manager_namespace: cap-metal-stack
- name: cluster-api-provider-metal-stack
23 changes: 23 additions & 0 deletions capi-lab/firewall-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
egress:
- comment: allow outgoing traffic for HTTP and HTTPS and DNS
ports:
- 443
- 80
- 53
protocol: TCP
to:
- 0.0.0.0/0
- comment: allow outgoing DNS and NTP via UDP
ports:
- 53
- 123
protocol: UDP
to:
- 0.0.0.0/0
ingress:
- comment: allow incoming HTTPS to kube-apiserver
ports:
- 443
protocol: TCP
from:
- 0.0.0.0/0
2 changes: 1 addition & 1 deletion capi-lab/requirements.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
version: v0.2.9
- src: https://github.com/metal-stack/metal-roles
name: metal-roles
version: v0.14.2
version: v0.14.5
2 changes: 2 additions & 0 deletions capi-lab/roles/firewall-controller-manager/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
---
firewall_controller_manager_namespace: "firewall-controller-manager"
29 changes: 29 additions & 0 deletions capi-lab/roles/firewall-controller-manager/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
- name: Gather release versions
setup_yaml:

- name: Create namespace
k8s:
definition:
apiVersion: v1
kind: Namespace
metadata:
name: "{{ firewall_controller_manager_namespace }}"
labels:
name: "{{ firewall_controller_manager_namespace }}"

- name: Deploy firewall-controller-manager CRDs
k8s:
definition: "{{ lookup('url', 'https://raw.githubusercontent.com/metal-stack/firewall-controller-manager/refs/tags/' + firewall_controller_manager_image_tag + '/config/crds/' + item, split_lines=False) }}"
namespace: "{{ firewall_controller_manager_namespace }}"
loop:
- firewall.metal-stack.io_firewalldeployments.yaml
- firewall.metal-stack.io_firewallmonitors.yaml
- firewall.metal-stack.io_firewalls.yaml
- firewall.metal-stack.io_firewallsets.yaml

# - name: Deploy firewall-controller-manager
# k8s:
# definition:

# namespace: "{{ firewall_controller_manager_namespace }}"
3 changes: 3 additions & 0 deletions capi-lab/roles/prometheus/defaults/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
prometheus_namespace: prometheus
prometheus_helm_chart_version: "66.2.1"
22 changes: 22 additions & 0 deletions capi-lab/roles/prometheus/tasks/main.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
- name: Create namespace
k8s:
definition:
apiVersion: v1
kind: Namespace
metadata:
name: "{{ prometheus_namespace }}"
labels:
name: "{{ prometheus_namespace }}"

- name: Deploy Prometheus
include_role:
name: ansible-common/roles/helm-chart
vars:
helm_chart: kube-prometheus-stack
helm_repo: https://prometheus-community.github.io/helm-charts
helm_force: false
helm_release_name: kube-prometheus-stack
helm_target_namespace: "{{ prometheus_namespace }}"
helm_chart_version: "{{ prometheus_helm_chart_version }}"
# helm_value_file_template: values.yaml
121 changes: 121 additions & 0 deletions config/samples/example-kubeadm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
---
apiVersion: cluster.x-k8s.io/v1beta1
kind: Cluster
metadata:
name: metal-test
spec:
clusterNetwork:
pods:
cidrBlocks: ["192.168.0.0/16"]
controlPlaneRef:
apiVersion: controlplane.cluster.x-k8s.io/v1beta1
kind: KubeadmControlPlane
name: metal-test-controlplane
infrastructureRef:
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: MetalStackCluster
name: metal-test
---
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: MetalStackCluster
metadata:
name: metal-test
spec:
projectID: 00000000-0000-0000-0000-000000000001
partition: mini-lab
firewall:
size: v1-small-x86
image: firewall-ubuntu-3.0
networks:
- internet-mini-lab
---
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
kind: MetalStackMachineTemplate
metadata:
name: metal-test-controlplane
spec:
template:
spec:
image: ubuntu-24.04
size: v1-small-x86
---
kind: KubeadmControlPlane
apiVersion: controlplane.cluster.x-k8s.io/v1beta1
metadata:
name: metal-test-controlplane
spec:
replicas: 1
version: v1.30.6
machineTemplate:
nodeDrainTimeout: 10m
infrastructureRef:
kind: MetalStackMachineTemplate
apiVersion: infrastructure.cluster.x-k8s.io/v1alpha1
name: metal-test-controlplane
kubeadmConfigSpec:
format: ignition
clusterConfiguration:
controlPlaneEndpoint: 203.0.113.129:443
initConfiguration:
localAPIEndpoint:
advertiseAddress: 10.0.0.2
bindPort: 443
nodeRegistration: {}
joinConfiguration:
controlPlane: {}
nodeRegistration: {}
ignition:
containerLinuxConfig:
additionalConfig: |
systemd:
units:
- name: cluster-api-init.service
enable: true
contents: |-
[Unit]
Description=Prepares the node for bootstrapping with cluster-api kubeadm
Before=kubeadm.service
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
Restart=on-failure
RestartSec=5
StartLimitBurst=0
EnvironmentFile=/etc/environment
ExecStart=/var/lib/cluster-api-init/bootstrap.sh
[Install]
WantedBy=multi-user.target
files:
- path: /var/lib/cluster-api-init/bootstrap.sh
owner: "root:root"
permissions: "0744"
content: |
#!/usr/bin/env bash
set -eo pipefail
set +x

apt update
apt install conntrack

CNI_PLUGINS_VERSION="v1.3.0"
DEST="/opt/cni/bin"
mkdir -p "$DEST"
curl -L "https://github.com/containernetworking/plugins/releases/download/${CNI_PLUGINS_VERSION}/cni-plugins-linux-amd64-${CNI_PLUGINS_VERSION}.tgz" | sudo tar -C "$DEST" -xz

RELEASE="v1.30.6"
cd /usr/local/bin
sudo curl -L --remote-name-all https://dl.k8s.io/release/${RELEASE}/bin/linux/amd64/{kubeadm,kubelet,kubectl}
sudo chmod +x {kubeadm,kubelet,kubectl}

RELEASE_VERSION="v0.16.2"
curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubelet/kubelet.service" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service
sudo mkdir -p /usr/lib/systemd/system/kubelet.service.d
curl -sSL "https://raw.githubusercontent.com/kubernetes/release/${RELEASE_VERSION}/cmd/krel/templates/latest/kubeadm/10-kubeadm.conf" | sed "s:/usr/bin:/usr/local/bin:g" | sudo tee /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf

systemctl enable kubelet.service
- path: /etc/containerd/config.toml
owner: "root:root"
permissions: "0644"
content: |
disabled_plugins = []
61 changes: 0 additions & 61 deletions config/samples/example.yaml

This file was deleted.

2 changes: 1 addition & 1 deletion config/samples/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
namespace: default

resources:
- example.yaml
- example-kubeadm.yaml
# +kubebuilder:scaffold:manifestskustomizesamples
18 changes: 11 additions & 7 deletions internal/controller/metalstackcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,12 @@ func (r *MetalStackClusterReconciler) SetupWithManager(mgr ctrl.Manager) error {
}

func (r *clusterReconciler) reconcile() error {
nodeCIDR, err := r.ensureNodeNetwork()
nodeNetworkID, err := r.ensureNodeNetwork()
if err != nil {
return fmt.Errorf("unable to ensure node network: %w", err)
}

r.log.Info("reconciled node network", "cidr", nodeCIDR)
r.log.Info("reconciled node network", "network-id", nodeNetworkID)

ip, err := r.ensureControlPlaneIP()
if err != nil {
Expand All @@ -197,7 +197,7 @@ func (r *clusterReconciler) reconcile() error {
return fmt.Errorf("failed to update infra cluster control plane endpoint: %w", err)
}

fwdeploy, err := r.ensureFirewallDeployment(nodeCIDR)
fwdeploy, err := r.ensureFirewallDeployment(nodeNetworkID)
if err != nil {
return fmt.Errorf("unable to ensure firewall deployment: %w", err)
}
Expand Down Expand Up @@ -259,15 +259,15 @@ func (r *clusterReconciler) ensureNodeNetwork() (string, error) {
return "", fmt.Errorf("error creating node network: %w", err)
}

return resp.Payload.Prefixes[0], nil
return *resp.Payload.ID, nil
case 1:
nw := nws[0]

if len(nw.Prefixes) == 0 {
return "", errors.New("node network exists but the prefix is gone")
}

return nw.Prefixes[0], nil
return *nw.ID, nil
default:
return "", fmt.Errorf("more than a single node network exists for this cluster, operator investigation is required")
}
Expand Down Expand Up @@ -399,7 +399,7 @@ func (r *clusterReconciler) findControlPlaneIP() ([]*models.V1IPResponse, error)
return resp.Payload, nil
}

func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.FirewallDeployment, error) {
func (r *clusterReconciler) ensureFirewallDeployment(nodeNetworkID string) (*fcmv2.FirewallDeployment, error) {
deploy := &fcmv2.FirewallDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: r.infraCluster.Name,
Expand Down Expand Up @@ -440,7 +440,7 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi

deploy.Spec.Template.Spec.Size = r.infraCluster.Spec.Firewall.Size
deploy.Spec.Template.Spec.Image = r.infraCluster.Spec.Firewall.Image
deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeCIDR)
deploy.Spec.Template.Spec.Networks = append(r.infraCluster.Spec.Firewall.AdditionalNetworks, nodeNetworkID)
deploy.Spec.Template.Spec.RateLimits = r.infraCluster.Spec.Firewall.RateLimits
deploy.Spec.Template.Spec.EgressRules = r.infraCluster.Spec.Firewall.EgressRules
deploy.Spec.Template.Spec.LogAcceptedConnections = ptr.Deref(r.infraCluster.Spec.Firewall.LogAcceptedConnections, false)
Expand All @@ -452,6 +452,10 @@ func (r *clusterReconciler) ensureFirewallDeployment(nodeCIDR string) (*fcmv2.Fi
deploy.Spec.Template.Spec.NftablesExporterVersion = ""
deploy.Spec.Template.Spec.NftablesExporterURL = ""

// TODO: we need to allow internet connection for the nodes before the firewall-controller can connect to the control-plane
// the FCM currently does not support this
deploy.Spec.Template.Spec.Userdata = ""

// TODO: do we need to generate ssh keys for the machines and the firewall in this controller?
deploy.Spec.Template.Spec.SSHPublicKeys = nil

Expand Down