diff --git a/crates/kiss/ansible/src/config.rs b/crates/kiss/ansible/src/config.rs index 3219efea..861a3354 100644 --- a/crates/kiss/ansible/src/config.rs +++ b/crates/kiss/ansible/src/config.rs @@ -17,6 +17,7 @@ pub struct KissConfig { pub group_enforce_ansible_control_planes: bool, pub group_force_reset: bool, pub group_force_reset_os: bool, + pub group_reset_storage: bool, pub kiss_cluster_name: String, pub kubespray_image: String, pub network_interface_mtu_size: u16, @@ -27,6 +28,7 @@ pub struct KissConfig { pub network_ipv4_subnet: Ipv4Net, pub network_nameserver_incluster_ipv4: Ipv4Addr, pub os_default: String, + pub os_kernel: String, } impl KissConfig { @@ -55,6 +57,7 @@ impl KissConfig { )?, group_force_reset: infer(&config, "group_force_reset")?, group_force_reset_os: infer(&config, "group_force_reset_os")?, + group_reset_storage: infer(&config, "group_reset_storage")?, kiss_cluster_name: infer(&config, "kiss_cluster_name")?, kubespray_image: infer(&config, "kubespray_image")?, network_interface_mtu_size: infer(&config, "network_interface_mtu_size")?, @@ -65,6 +68,7 @@ impl KissConfig { network_ipv4_subnet: infer(&config, "network_ipv4_subnet")?, network_nameserver_incluster_ipv4: infer(&config, "network_nameserver_incluster_ipv4")?, os_default: infer(&config, "os_default")?, + os_kernel: infer(&config, "os_kernel")?, }) } } diff --git a/crates/kiss/ansible/src/lib.rs b/crates/kiss/ansible/src/lib.rs index 6aa12416..711c7c50 100644 --- a/crates/kiss/ansible/src/lib.rs +++ b/crates/kiss/ansible/src/lib.rs @@ -149,6 +149,14 @@ impl AnsibleClient { }), spec: Some(PodSpec { affinity: Some(crate::job::affinity()), + dns_config: Some(PodDNSConfig { + nameservers: Some(vec![ + self.kiss.bootstrapper_network_dns_server_ns1.to_string(), + self.kiss.bootstrapper_network_dns_server_ns2.to_string(), + ]), + ..Default::default() + }), + host_network: Some(true), priority_class_name: Some(priority_class_name.into()), restart_policy: Some("OnFailure".into()), service_account: Some("ansible-playbook".into()), @@ -312,6 +320,11 @@ impl AnsibleClient { value: Some(self.kiss.group_force_reset_os.to_string()), ..Default::default() }, + EnvVar { + name: "kiss_group_reset_storage".into(), + value: Some(self.kiss.group_reset_storage.to_string()), + ..Default::default() + }, EnvVar { name: "kiss_group_role".into(), value: Some(group.role.to_string()), @@ -420,6 +433,11 @@ impl AnsibleClient { value: Some(self.kiss.os_default.to_string()), ..Default::default() }, + EnvVar { + name: "kiss_os_kernel".into(), + value: Some(self.kiss.os_kernel.to_string()), + ..Default::default() + }, EnvVar { name: "kiss_power_intel_amt_host".into(), value: job diff --git a/templates/bootstrap/bootstrap.sh b/templates/bootstrap/bootstrap.sh index 8316c278..d7dfe8de 100755 --- a/templates/bootstrap/bootstrap.sh +++ b/templates/bootstrap/bootstrap.sh @@ -732,7 +732,7 @@ function install_kiss_cluster() { kubectl -n kiss rollout status deployment operator # Register the boxes - for node in ${nodes}; do + for node in $@; do __log 'INFO' "Registering the box: ${node} ..." cat <>/etc/dnf/dnf.conf echo 'fastestmirror=True' >>/etc/dnf/dnf.conf echo 'max_parallel_downloads=5' >>/etc/dnf/dnf.conf -# Kernel Configuration -## Install Bleeding-edge kernel -dnf install --enablerepo='elrepo-kernel' -y \ - kernel-ml \ - kernel-ml-core \ - kernel-ml-devel \ - kernel-ml-modules \ - kernel-ml-modules-extra - # Advanced Network configuration mkdir -p /etc/NetworkManager/system-connections/ ## Wireless - WIFI diff --git a/templates/kiss/tasks/commission/storage.yaml b/templates/kiss/tasks/commission/storage.yaml index 9b2a4c4c..27733d84 100644 --- a/templates/kiss/tasks/commission/storage.yaml +++ b/templates/kiss/tasks/commission/storage.yaml @@ -140,6 +140,7 @@ - name: Cleanup disks when: + - kiss_group_reset_storage is not defined or kiss_group_reset_storage - kiss_group_role_is_member is defined and kiss_group_role_is_member - not kiss_os_exists or kiss_os_dirty - not kiss_storage_exists diff --git a/templates/kiss/tasks/commission/system-kernel-upgrade.yaml b/templates/kiss/tasks/commission/system-kernel-upgrade.yaml index 7208a41e..e21da7f7 100644 --- a/templates/kiss/tasks/commission/system-kernel-upgrade.yaml +++ b/templates/kiss/tasks/commission/system-kernel-upgrade.yaml @@ -1,5 +1,6 @@ --- - name: Check whether the latest kernel is running + when: kiss_os_kernel is defined and kiss_os_kernel == 'edge' block: - name: Check whether the latest kernel is running - Flatcar Container Linux when: kiss_os_default == 'flatcar' @@ -8,11 +9,12 @@ - name: Check whether the latest kernel is running - RockyLinux when: kiss_os_default == 'rocky9' set_fact: - kiss_is_kernel_latest: "{{ '.elrepo.' in ansible_facts.kernel }}" + kiss_is_kernel_latest: "{{ '.fc' in ansible_facts.kernel }}" register: boot_file - name: Upgrade kernel - RockyLinux when: + - kiss_os_kernel is defined and kiss_os_kernel == 'edge' - not kiss_is_kernel_latest - kiss_os_default == 'rocky9' block: @@ -21,32 +23,54 @@ name: "*" state: latest - - name: Upgrade kernel - RockyLinux - Add ELRepo Repository + - name: Upgrade kernel - RockyLinux - Install dependencies dnf: name: - - elrepo-release + - koji - grubby - sqlite - enablerepo: elrepo-kernel + enablerepo: epel state: latest - - name: Upgrade kernel - RockyLinux - Remove Stable Kernel - dnf: - name: - - kernel - - kernel-core - - kernel-devel - - kernel-modules - - kernel-modules-core - state: absent - autoremove: no + - name: Upgrade kernel - RockyLinux - Get the latest kernel version + shell: + cmd: > + koji list-builds --package=kernel --state=COMPLETE + | awk '{print $1}' + | grep -P '^kernel-[0-9\.-]+\.fc[0-9]+$' + | sort -V + | tail -n1 + register: kiss_kernel_version + + - name: Upgrade kernel - RockyLinux - Install Bleeding-edge Kernel ({{ kiss_kernel_version.stdout }}) + vars: + kiss_kernel_dir: /tmp/kernel-koji + shell: + cmd: > + mkdir -p "{{ kiss_kernel_dir }}" + && cd "{{ kiss_kernel_dir }}" + && koji download-build --noprogress --arch="{{ ansible_architecture }}" "{{ kiss_kernel_version.stdout }}" + && dnf install -y + "{{ kiss_kernel_dir }}/kernel-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}.rpm" + "{{ kiss_kernel_dir }}/kernel-core-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}.rpm" + "{{ kiss_kernel_dir }}/kernel-devel-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}.rpm" + "{{ kiss_kernel_dir }}/kernel-modules-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}.rpm" + "{{ kiss_kernel_dir }}/kernel-modules-core-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}.rpm" + && cd / + && rm -rf "{{ kiss_kernel_dir }}" + && grubby --set-default="/boot/vmlinuz-{{ kiss_kernel_version.stdout[7:] }}.{{ ansible_architecture }}" + && grub2-mkconfig -o /boot/grub2/grub.cfg - name: Reboot the boxes if upgraded (The task will be restarted) - when: not kiss_is_kernel_latest + when: + - kiss_os_kernel is defined and kiss_os_kernel == 'edge' + - not kiss_is_kernel_latest reboot: reboot_timeout: 3600 # 1h (booting can take a long time) - name: Assert rebooting - when: not kiss_is_kernel_latest + when: + - kiss_os_kernel is defined and kiss_os_kernel == 'edge' + - not kiss_is_kernel_latest fail: msg: The nodes should be rebooted! diff --git a/templates/kiss/tasks/commission/system-systemd.yaml b/templates/kiss/tasks/commission/system-systemd.yaml new file mode 100644 index 00000000..c483334f --- /dev/null +++ b/templates/kiss/tasks/commission/system-systemd.yaml @@ -0,0 +1,8 @@ +--- +# See: https://docs.cilium.io/en/v1.16/operations/performance/tuning/#stop-irqbalance-and-pin-the-nic-interrupts-to-specific-cpus +- name: Disable irqbalance + systemd: + name: irqbalance.service + state: stopped + enabled: false + daemon_reload: true diff --git a/templates/kiss/tasks/commission/system.yaml b/templates/kiss/tasks/commission/system.yaml index c18fb000..5ce8aaac 100644 --- a/templates/kiss/tasks/commission/system.yaml +++ b/templates/kiss/tasks/commission/system.yaml @@ -1,3 +1,6 @@ --- - name: Provision Kernel include_tasks: system-kernel.yaml + +- name: Provision SystemD + include_tasks: system-systemd.yaml diff --git a/templates/kiss/tasks/common/playbook-common.yaml b/templates/kiss/tasks/common/playbook-common.yaml index 28a213cb..faf84593 100644 --- a/templates/kiss/tasks/common/playbook-common.yaml +++ b/templates/kiss/tasks/common/playbook-common.yaml @@ -52,6 +52,7 @@ kiss_group_enable_default_cluster: "{{ lookup('env', 'kiss_group_enable_default_cluster') == 'true' }}" kiss_group_force_reset: "{{ lookup('env', 'kiss_group_force_reset') == 'true' }}" kiss_group_force_reset_os: "{{ lookup('env', 'kiss_group_force_reset_os') == 'true' }}" + kiss_group_reset_storage: "{{ lookup('env', 'kiss_group_reset_storage') == 'true' }}" kiss_group_role: "{{ lookup('env', 'kiss_group_role') }}" kiss_group_role_is_domain_specific: "{{ lookup('env', 'kiss_group_role_is_domain_specific') }}" kiss_group_role_is_member: "{{ lookup('env', 'kiss_group_role_is_member') == 'true' }}" @@ -71,6 +72,7 @@ kiss_network_wireless_wifi_ssid: "{{ lookup('env', 'kiss_network_wireless_wifi_ssid') }}" kiss_os_default: "{{ lookup('env', 'kiss_os_default') }}" kiss_os_hot_install: "{{ lookup('env', 'kiss_os_default') in ['flatcar'] }}" + kiss_os_kernel: "{{ lookup('env', 'kiss_os_kernel') }}" kiss_power_intel_amt_host: "{{ lookup('env', 'kiss_power_intel_amt_host') }}" kiss_power_intel_amt_username: "{{ lookup('env', 'kiss_power_intel_amt_username') }}" kiss_power_intel_amt_password: "{{ lookup('env', 'kiss_power_intel_amt_password') }}" diff --git a/templates/kiss/tasks/join/add-node-role.yaml b/templates/kiss/tasks/join/add-node-role.yaml index 82fb7e4a..5a231010 100644 --- a/templates/kiss/tasks/join/add-node-role.yaml +++ b/templates/kiss/tasks/join/add-node-role.yaml @@ -1,4 +1,18 @@ --- +- name: Post-install | Wait for apiserver to be operated + hosts: target + tasks: + - command: "{{ bin_dir }}/kubectl version" + delegate_to: "{{ groups['kube_control_plane'] | first }}" + retries: 10 + delay: 5 + + - command: > + {{ bin_dir }}/kubectl get nodes {{ inventory_hostname }} --output name + delegate_to: "{{ groups['kube_control_plane'] | first }}" + retries: 10 + delay: 5 + - hosts: target tasks: - name: Mark the bootstrapped node as "{{ kiss_group_role }}"