From 617fc44f38f00cb9ea71d95cb0bb69f0b6ccf25d Mon Sep 17 00:00:00 2001 From: Lars Kellogg-Stedman Date: Sat, 25 Jun 2022 08:45:08 -0400 Subject: [PATCH 1/3] Disable predictable network device names. Despite having identical PCI configurations, ctl-0 has different network device names than ctl-1 and ctl-2. By disabling "predictable network interface names" [1], we end up with consistent device names across the cluster. This drastically simplifies the process of applying networking configuration changes across the cluster. [1]: https://www.freedesktop.org/wiki/Software/systemd/PredictableNetworkInterfaceNames/ --- .../overlays/nerc-ocp-infra/kustomization.yaml | 1 + .../machineconfigs/disable-net-ifnames.yaml | 13 +++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/disable-net-ifnames.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml index 61e931cd..d02a7688 100644 --- a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml @@ -8,6 +8,7 @@ resources: - ../../bundles/acm - ../../bundles/odf - clusterversion.yaml +- machineconfigs/disable-net-ifnames.yaml patches: - path: oauths/cluster_patch.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/disable-net-ifnames.yaml b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/disable-net-ifnames.yaml new file mode 100644 index 00000000..dbf0aa8c --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/disable-net-ifnames.yaml @@ -0,0 +1,13 @@ +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: disable-net-ifnames +spec: + config: + ignition: + version: 3.2.0 + kernelArguments: + - net.ifnames=0 + - biosdevname=0 From b9a824c84ac9f04c49936291eaa686f42f3631ea Mon Sep 17 00:00:00 2001 From: Lars Kellogg-Stedman Date: Sat, 25 Jun 2022 21:29:44 -0400 Subject: [PATCH 2/3] Add udev rules to identify mellanox network interfaces We disabled predictable network device names in the previous commit, which means we're now at the mercy of the order in which the kernel scans network devices. If a card is added or removed from the system, what we know as "eth2" may no longer be the same interface it is now. To solve this, we add udev rules that match the network interfaces based on characteristics such as their PCI bus/device/function. --- .../overlays/nerc-ocp-infra/kustomization.yaml | 1 + .../mellanox-udev-rules/Makefile | 10 ++++++++++ .../mellanox-udev-rules/kustomization.yaml | 4 ++++ .../mellanox-udev-rules/machineconfig.yaml | 18 ++++++++++++++++++ .../mellanox-udev-rules/src/machineconfig.bu | 12 ++++++++++++ .../mellanox-udev-rules/src/mellanox.rules | 2 ++ 6 files changed, 47 insertions(+) create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/Makefile create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/kustomization.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/machineconfig.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/machineconfig.bu create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/mellanox.rules diff --git a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml index d02a7688..795d6be6 100644 --- a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml @@ -9,6 +9,7 @@ resources: - ../../bundles/odf - clusterversion.yaml - machineconfigs/disable-net-ifnames.yaml +- machineconfigs/mellanox-udev-rules patches: - path: oauths/cluster_patch.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/Makefile b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/Makefile new file mode 100644 index 00000000..9327d8d7 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/Makefile @@ -0,0 +1,10 @@ +MACHINECONFIGS = \ + machineconfig.yaml + +%.yaml: src/%.bu + butane -o $@ -d src $< + +all: $(MACHINECONFIGS) + +clean: + rm -f $(MACHINECONFIGS) diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/kustomization.yaml new file mode 100644 index 00000000..18be132e --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- machineconfig.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/machineconfig.yaml b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/machineconfig.yaml new file mode 100644 index 00000000..cad458ce --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/machineconfig.yaml @@ -0,0 +1,18 @@ +# Generated by Butane; do not edit +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: mellanox-udev-rules +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + compression: gzip + source: data:;base64,H4sIAAAAAAAC/woOdQqODA5x9bW1VcpLLVHScQnyDHMNCra1VcrNqTCNT84vSlXS8XYN8nP1AQkaGBgYWBmYWRkY6Bko6Tg6h3j6+9naKiWmpCjp+Dn6utoq5WUmGypxUWCuIU5zjZS4AAEAAP//AAlzT7AAAAA= + mode: 420 + path: /etc/udev/rules.d/90-mellanox.rules diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/machineconfig.bu b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/machineconfig.bu new file mode 100644 index 00000000..ded6dfa4 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/machineconfig.bu @@ -0,0 +1,12 @@ +variant: openshift +version: 4.10.0 +metadata: + name: mellanox-udev-rules + labels: + machineconfiguration.openshift.io/role: master +storage: + files: + - path: /etc/udev/rules.d/90-mellanox.rules + contents: + local: mellanox.rules + mode: 0644 diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/mellanox.rules b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/mellanox.rules new file mode 100644 index 00000000..64d23ed0 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/mellanox-udev-rules/src/mellanox.rules @@ -0,0 +1,2 @@ +SUBSYSTEM=="net",DRIVERS=="mlx5_core",KERNELS=="0000:06:00.0",ACTION=="add",NAME="nic1" +SUBSYSTEM=="net",DRIVERS=="mlx5_core",KERNELS=="0000:06:00.1",ACTION=="add",NAME="nic2" From 082e1fe35834c768c5c1dbdfdf34c25645b90229 Mon Sep 17 00:00:00 2001 From: Lars Kellogg-Stedman Date: Thu, 23 Jun 2022 18:28:34 -0400 Subject: [PATCH 3/3] Configure bond0 interface We want to use bonded interface pairs on these system. The nodes aren't yet wired for it, but setting this up now will allow us to refer to the `bond0` interface in e.g. VLAN configurations (and means we won't have to re-work those later). Because we're using OVNKubernetes, we can't use nmstate [1] to enact the configuration. The recommended mechanism for modifying the primary interface configuration is to apply the configuration using a MachineConfig [2] resource. By taking advantage of the changes in the previous commits, we can simply install NetworkManager connection files for bond0, nic1, and nic2. This will take effect when the system reboots (which will happen when the MachineConfig Operator applies our changes). [1]: https://docs.openshift.com/container-platform/4.10/networking/k8s_nmstate/k8s-nmstate-about-the-k8s-nmstate-operator.html [2]: https://docs.openshift.com/container-platform/4.10/post_installation_configuration/machine-configuration-tasks.html x-branch: feature/bond0-beta --- .../nerc-ocp-infra/kustomization.yaml | 1 + .../machineconfigs/configure-bond0/Makefile | 10 +++++++ .../machineconfigs/configure-bond0/README.md | 7 +++++ .../configure-bond0/kustomization.yaml | 4 +++ .../configure-bond0/machineconfig.yaml | 26 +++++++++++++++++++ .../configure-bond0/src/bond0.nmconnection | 17 ++++++++++++ .../configure-bond0/src/machineconfig.bu | 20 ++++++++++++++ .../configure-bond0/src/nic1.nmconnection | 10 +++++++ .../configure-bond0/src/nic2.nmconnection | 10 +++++++ 9 files changed, 105 insertions(+) create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/Makefile create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/README.md create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/kustomization.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/machineconfig.yaml create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/bond0.nmconnection create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/machineconfig.bu create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic1.nmconnection create mode 100644 cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic2.nmconnection diff --git a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml index 795d6be6..3982405b 100644 --- a/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml +++ b/cluster-scope/overlays/nerc-ocp-infra/kustomization.yaml @@ -10,6 +10,7 @@ resources: - clusterversion.yaml - machineconfigs/disable-net-ifnames.yaml - machineconfigs/mellanox-udev-rules +- machineconfigs/configure-bond0 patches: - path: oauths/cluster_patch.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/Makefile b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/Makefile new file mode 100644 index 00000000..d0b76c14 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/Makefile @@ -0,0 +1,10 @@ +MACHINECONFIGS = \ + machineconfig.yaml + +%.yaml: src/%.bu $(wildcard src/*.nmconnection) + butane -o $@ -d src $< + +all: $(MACHINECONFIGS) + +clean: + rm -f $(MACHINECONFIGS) diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/README.md b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/README.md new file mode 100644 index 00000000..f7eb8a10 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/README.md @@ -0,0 +1,7 @@ +Including file content in ignition configs is a pain, because it has to be base64 encoded. The `Makefile` in this directory uses [Butane][] to transpile `MachineConfig` resources, automatically including and encoding content from files. + +Read "[Creating machine configs with Butane][]" in the [OpenShift documentation][] for more information. + +[butane]: https://coreos.github.io/butane/ +[openshift documentation]: https://docs.openshift.com/container-platform/4.10/installing/install_config/installing-customizing.html +[creating machine configs with butane]: https://docs.openshift.com/container-platform/4.10/installing/install_config/installing-customizing.html#installation-special-config-butane_installing-customizing diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/kustomization.yaml b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/kustomization.yaml new file mode 100644 index 00000000..18be132e --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- machineconfig.yaml diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/machineconfig.yaml b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/machineconfig.yaml new file mode 100644 index 00000000..e874f6db --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/machineconfig.yaml @@ -0,0 +1,26 @@ +# Generated by Butane; do not edit +apiVersion: machineconfiguration.openshift.io/v1 +kind: MachineConfig +metadata: + labels: + machineconfiguration.openshift.io/role: master + name: configure-bond0 +spec: + config: + ignition: + version: 3.2.0 + storage: + files: + - contents: + compression: gzip + source: data:;base64,H4sIAAAAAAAC/0zMwaqDMBCF4f15FyXeKxfvYp5EskiTKQY6M6IToW9fLEK7O/BzvjmbKmevphG10M20BPhz5fdEVeftnjJ3moSvnJrb9SPfGuOD9F+t2x/p4J0GYGZfeFP2CPFG/yEEYD61CLHCNIWf/jcVSK1iSsN49roeY4SwL1bohFGWvHZeha05TX9jCHgFAAD///WlE5PCAAAA + mode: 384 + path: /etc/NetworkManager/system-connections/bond0.nmconnection + - contents: + source: data:,%5Bconnection%5D%0Aid%3Dnic1%0Atype%3Dethernet%0Ainterface-name%3Dnic1%0Amaster%3Dbond0%0Aslave-type%3Dbond%0Aautoconnect%3Dtrue%0A%0A%5Bethernet%5D%0Amtu%3D9000%0A + mode: 384 + path: /etc/NetworkManager/system-connections/nic1.nmconnection + - contents: + source: data:,%5Bconnection%5D%0Aid%3Dnic2%0Atype%3Dethernet%0Ainterface-name%3Dnic2%0Amaster%3Dbond0%0Aslave-type%3Dbond%0Aautoconnect%3Dtrue%0A%0A%5Bethernet%5D%0Amtu%3D9000%0A + mode: 384 + path: /etc/NetworkManager/system-connections/nic2.nmconnection diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/bond0.nmconnection b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/bond0.nmconnection new file mode 100644 index 00000000..0ab1a3bb --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/bond0.nmconnection @@ -0,0 +1,17 @@ +[connection] +id=bond0 +type=bond +interface-name=bond0 +autoconnect=true +connection.autoconnect-slaves=1 + +[ethernet] +mtu=9000 + +[bond] +mode=802.3ad +miimon=140 + +[ipv4] +method=auto +dhcp-timeout=86400 diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/machineconfig.bu b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/machineconfig.bu new file mode 100644 index 00000000..bd100b44 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/machineconfig.bu @@ -0,0 +1,20 @@ +variant: openshift +version: 4.10.0 +metadata: + name: configure-bond0 + labels: + machineconfiguration.openshift.io/role: master +storage: + files: + - path: /etc/NetworkManager/system-connections/bond0.nmconnection + mode: 0600 + contents: + local: bond0.nmconnection + - path: /etc/NetworkManager/system-connections/nic1.nmconnection + mode: 0600 + contents: + local: nic1.nmconnection + - path: /etc/NetworkManager/system-connections/nic2.nmconnection + mode: 0600 + contents: + local: nic2.nmconnection diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic1.nmconnection b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic1.nmconnection new file mode 100644 index 00000000..4e1a4bbb --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic1.nmconnection @@ -0,0 +1,10 @@ +[connection] +id=nic1 +type=ethernet +interface-name=nic1 +master=bond0 +slave-type=bond +autoconnect=true + +[ethernet] +mtu=9000 diff --git a/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic2.nmconnection b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic2.nmconnection new file mode 100644 index 00000000..9fdac497 --- /dev/null +++ b/cluster-scope/overlays/nerc-ocp-infra/machineconfigs/configure-bond0/src/nic2.nmconnection @@ -0,0 +1,10 @@ +[connection] +id=nic2 +type=ethernet +interface-name=nic2 +master=bond0 +slave-type=bond +autoconnect=true + +[ethernet] +mtu=9000