-
Notifications
You must be signed in to change notification settings - Fork 44
/
mellanox-firmware-version
executable file
·53 lines (50 loc) · 3.01 KB
/
mellanox-firmware-version
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env bash
# description: Checks if the nodes' Mellanox Connect-4 firmware version is below the recommended version.
# lspci -nn shows PCI vendor and device codes (and names)
# Mellanox MT27710 Family [ConnectX-4 Lx] 25GbE dual-port SFP28 with **vendor ID 0x15b3 and device ID 0x1015**
# Mellanox MT27800 Family [ConnectX-5] 25GbE dual-port SFP28 with **vendor ID 0x15b3 and device ID 0x1017**
# Mellanox MT27800 Family [ConnectX-5] 100GbE with **vendor ID 0x15b3 and device ID 0x1017**
# Mellanox MT27700 Family [ConnectX-4] VPI adapter card, EDR IB (100Gb/s), single-port QSFP28 with **vendor ID 0x15b3 and device ID 0x1013**
# Mellanox MT27800 Family [ConnectX-5] VPI adapter card, EDR IB (100Gb/s), single-port QSFP28 with **vendor ID 0x15b3 and device ID 0x1017**
# Mellanox MT28908 Family [ConnectX-6] VPI adapter card, 100Gb/s (HDR100, EDR IB), single-port QSFP56 with **vendor ID 0x15b3 and device ID 0x101b**
# Mellanox MT28908 Family [ConnectX-6] VPI adapter card, HDR200 IB (200Gb/s), single-port QSFP56 with vendor ID **0x15b3 and device ID 0x101b**
IDS="15b3:1015 15b3:1017 15b3:1013 15b3:101b"
MIN_VERS=16.28
[ -z ${UTILSFILE} ] && source $(echo "$(dirname ${0})/../utils")
if oc auth can-i debug node >/dev/null 2>&1; then
msg "Checking Mellanox firmware version (${BLUE}using oc debug, it can take a while${NOCOLOR})"
fw_errors=0
# shellcheck disable=SC2016
for node in $(oc get nodes -o go-template='{{range .items}}{{$node := .}}{{range .status.conditions}}{{if eq .type "Ready"}}{{if eq .status "True"}}node/{{$node.metadata.name}}{{"\n"}}{{end}}{{end}}{{end}}{{end}}'); do
# shellcheck disable=SC1083
ocdebugorwait # Pause for no OC debug running
if ! FIRMWAREVERS=$(oc debug --image="${OCDEBUGIMAGE}" "${node}" -- chroot /host sh -c "for id in ${IDS}; do for device in \$(lspci -D -d "\${id}" | awk '{ print \$1 }'); do echo -n \"\${device},\" ; ethtool -i \$(ls /sys/bus/pci/devices/\${device}/net/)|grep firmware-version|cut -d: -f2-|xargs echo|awk '{ print \$1 }';done;done" 2>/dev/null); then
msg "${ORANGE}Error running oc debug in ${node}${NOCOLOR}"
else
if [ -n "${FIRMWAREVERS}" ]; then
for result in ${FIRMWAREVERS}; do
dev=$(echo ${result} | awk -F, '{print $1}')
fw=$(echo ${result} | awk -F, '{print $2}' | awk -F. '{print $1"."$2}')
if [[ $(expr ${fw} \< ${MIN_VERS}) -eq 1 ]]; then
msg "Firmware for Mellanox card ${RED}${dev}${NOCOLOR} (${fw}) on ${RED}${node}${NOCOLOR} is below the minimum recommended version. Please upgrade to at least ${GREEN}${MIN_VERS}${NOCOLOR}."
errors=$(("${errors}" + 1))
fw_errors=$(("${fw_errors}" + 1))
if [ ! -z "${ERRORFILE}" ]; then
echo $errors >${ERRORFILE}
fi
fi
done
else
msg "Couldn't find Mellanox firmware version in ${node}"
fi
fi
done
if [[ $fw_errors -gt 0 ]]; then
exit ${OCERROR}
fi
exit ${OCINFO}
else
msg "Couldn't debug nodes, check permissions"
exit ${OCSKIP}
fi
exit ${OCUNKNOWN}