From 50817d16b4ef2d3287b7e5fefe1340aef75f5733 Mon Sep 17 00:00:00 2001 From: David J Wilder Date: Mon, 23 Sep 2024 14:31:24 -0700 Subject: [PATCH 1/3] Race in configure-ovs.sh affects bonding interface configuration. Bonded network configurations with mode=active-backup and fail_over_mac=follow are not functioning due to a race when activating network profiles. activate_nm_connections() attempts to activate all its generated profiles that are not currently in the "active" state. As autoconnect-slaves is set, once br-ex is activated the bond and all its slaves are automatically activated. Their state is set to "activating" until they become active. The "activating" state is not tested for therefor some of the subordinate profiles maybe activated multiple times causing a race in the bonding driver and incorrectly configuring the bond. Link: https://github.com/openshift/machine-config-operator/issues/4605 Signed-off-by: David Wilder --- templates/common/_base/files/configure-ovs-network.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/common/_base/files/configure-ovs-network.yaml b/templates/common/_base/files/configure-ovs-network.yaml index ff924592aa..3d3f369c57 100644 --- a/templates/common/_base/files/configure-ovs-network.yaml +++ b/templates/common/_base/files/configure-ovs-network.yaml @@ -573,7 +573,7 @@ contents: # But set the entry in master_interfaces to true if this is a slave # Also set autoconnect to yes local active_state=$(nmcli -g GENERAL.STATE conn show "$conn") - if [ "$active_state" == "activated" ]; then + if [ "$active_state" == "activated" ] || [ "$active_state" == "activating" ]; then echo "Connection $conn already activated" if $is_slave; then master_interfaces[$master_interface]=true From a6773f6167927958b187630cdd3f06f0f34403aa Mon Sep 17 00:00:00 2001 From: David J Wilder Date: Mon, 9 Dec 2024 10:01:32 -0800 Subject: [PATCH 2/3] Revert "Race in configure-ovs.sh affects bonding interface configuration." This reverts commit 50817d16b4ef2d3287b7e5fefe1340aef75f5733. --- templates/common/_base/files/configure-ovs-network.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/common/_base/files/configure-ovs-network.yaml b/templates/common/_base/files/configure-ovs-network.yaml index 3d3f369c57..ff924592aa 100644 --- a/templates/common/_base/files/configure-ovs-network.yaml +++ b/templates/common/_base/files/configure-ovs-network.yaml @@ -573,7 +573,7 @@ contents: # But set the entry in master_interfaces to true if this is a slave # Also set autoconnect to yes local active_state=$(nmcli -g GENERAL.STATE conn show "$conn") - if [ "$active_state" == "activated" ] || [ "$active_state" == "activating" ]; then + if [ "$active_state" == "activated" ]; then echo "Connection $conn already activated" if $is_slave; then master_interfaces[$master_interface]=true From 76712800752e8989fb86e05b78c1a87f89227b25 Mon Sep 17 00:00:00 2001 From: David J Wilder Date: Mon, 9 Dec 2024 10:12:46 -0800 Subject: [PATCH 3/3] Give bonding slaves time to implicitly activate. With bonded network configurations slaves interfaces will be implicitly activate after br-ex is explicitly activated. This implicit activation can take a number of seconds, during this time if one and only one slave is explicitly activated the bonding driver may set the same MAC address to both slaves. This will cause the bond to fail when option fail_over_mac=follow is set. This change gives bond slaves up to 5 seconds to implicitly activate preventing the issue. Link: https://github.com/openshift/machine-config-operator/issues/4605 Signed-off-by: David Wilder --- templates/common/_base/files/configure-ovs-network.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/templates/common/_base/files/configure-ovs-network.yaml b/templates/common/_base/files/configure-ovs-network.yaml index ff924592aa..0a006ea592 100644 --- a/templates/common/_base/files/configure-ovs-network.yaml +++ b/templates/common/_base/files/configure-ovs-network.yaml @@ -569,6 +569,13 @@ contents: fi fi + # slaves should implicitly activate, give them a chance to do so + if $is_slave; then + if ! timeout 5 bash -c "while ! nmcli -g GENERAL.STATE conn show "$conn" | grep activated; do sleep 1; done"; then + echo "WARNING: slave $conn did not implicitly activate in 5s, activating explicitly." + fi + fi + # Do not activate interfaces that are already active # But set the entry in master_interfaces to true if this is a slave # Also set autoconnect to yes