From c4daddecfc085fad6121c7f642098d944e0b98b2 Mon Sep 17 00:00:00 2001
From: Kenneth Hoste <kenneth.hoste@ugent.be>
Date: Mon, 12 Feb 2024 21:33:55 +0100
Subject: [PATCH] add Lmod hook to set $OMPI_MCA_btl to '^smcuda' when loading
 OpenMPI module

---
 create_lmodrc.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/create_lmodrc.py b/create_lmodrc.py
index 0e738a530e..ca702346b4 100755
--- a/create_lmodrc.py
+++ b/create_lmodrc.py
@@ -94,7 +94,28 @@
     end
 end
 
+local function openmpi_load_hook(t)
+    -- disable smcuda BTL when loading OpenMPI module for aarch64/neoverse_v1,
+    -- to work around hang/crash due to bug in OpenMPI;
+    -- see https://gitlab.com/eessi/support/-/issues/41
+    local frameStk = require("FrameStk"):singleton()
+    local mt = frameStk:mt()
+    local moduleName = string.match(t.modFullName, "(.-)/")
+    local cpuTarget = os.getenv("EESSI_SOFTWARE_SUBDIR") or ""
+    if (moduleName == "OpenMPI") and (cpuTarget == "x86_64/intel/skylake_avx512") then --(cpuTarget == "aarch64/neoverse_v1") then
+        local msg = "Adding '^smcuda' to $OMPI_MCA_btl to work around bug in OpenMPI"
+        LmodMessage(msg .. " (see https://gitlab.com/eessi/support/-/issues/41)")
+	local ompiMcaBtl = os.getenv("OMPI_MCA_btl")
+	if ompiMcaBtl == nil then
+            setenv("OMPI_MCA_btl", "^smcuda")
+        else
+            setenv("OMPI_MCA_btl", ompiMcaBtl .. ",^smcuda")
+	end
+    end
+end
+
 hook.register("load", cuda_enabled_load_hook)
+hook.register("load", openmpi_load_hook)
 """
 
 def error(msg):