From 3b2266753f294ee9a1d40bc1c89e162520de44b5 Mon Sep 17 00:00:00 2001 From: Scott Smith Date: Thu, 19 Sep 2024 14:37:28 -0700 Subject: [PATCH] fixmybmc fpga_ver.sh check Summary: From oncall runbook: https://www.internalfb.com/intern/wiki/FBOSS_Platform/FBOSS_Platform_Oncall_Runbook/#fbar-hostname-audits-min Simply checks the output of fpga_ver.sh. After D62903380 fpga_ver.sh will `exit 1` if any PIM is not able to be detected. Test Plan: ```root@fboss311782520-oob:~# fixmybmc Loaded 4 checks Running checks... Running check: fpga_ver_ok Running check: provisioning_stable_image Running check: eth0_up Running check: sshd_running Result summary: Passed: 4 Problems: 0 Errors: 0 root@fboss311782520-oob:~# vi /usr/local/bin/fpga_ver.sh root@fboss311782520-oob:~# fixmybmc Loaded 4 checks Running checks... Running check: fpga_ver_ok Running check: provisioning_stable_image Running check: eth0_up Running check: sshd_running Result summary: Passed: 3 Problems: 1 Errors: 0 fpga_ver_ok fpga_ver.sh returned error. Check the below output for more details. Command: fpga_ver.sh stdout: DOMFPGA1 is not detected DOMFPGA2: 0.56 Not all DOMFPGA or PIM were detected/inserted. Please review the logs above.... exiting Remediation: Run `wedge_power.sh reset -s` to powercycle the device then run fixmybmc again and see if it has resolved. If not, send this error to ENS Break/Fix. ``` Reviewed By: alandau Differential Revision: D63042979 fbshipit-source-id: bfbcda69b0eab0eb4e6ffc6cc859623cbabf8179 --- .../files/fixmybmc/modules/__init__.py | 2 +- .../fixmybmc/files/fixmybmc/modules/fpga.py | 26 ++++++++++++++++++ .../fixmybmc/files/fixmybmc/status.py | 27 ++++++++++++++----- 3 files changed, 47 insertions(+), 8 deletions(-) create mode 100644 common/recipes-utils/fixmybmc/files/fixmybmc/modules/fpga.py diff --git a/common/recipes-utils/fixmybmc/files/fixmybmc/modules/__init__.py b/common/recipes-utils/fixmybmc/files/fixmybmc/modules/__init__.py index 2c1c687d371..76356709d3e 100644 --- a/common/recipes-utils/fixmybmc/files/fixmybmc/modules/__init__.py +++ b/common/recipes-utils/fixmybmc/files/fixmybmc/modules/__init__.py @@ -1,3 +1,3 @@ # Copyright 2015-present Facebook. All rights reserved. -from . import image_version, interface, ssh # noqa: F401 +from . import fpga, image_version, interface, ssh # noqa: F401 diff --git a/common/recipes-utils/fixmybmc/files/fixmybmc/modules/fpga.py b/common/recipes-utils/fixmybmc/files/fixmybmc/modules/fpga.py new file mode 100644 index 00000000000..86fa5654cfb --- /dev/null +++ b/common/recipes-utils/fixmybmc/files/fixmybmc/modules/fpga.py @@ -0,0 +1,26 @@ +from fixmybmc.bmccheck import bmcCheck +from fixmybmc.status import Problem +from fixmybmc.utils import run_cmd + + +@bmcCheck +def fpga_ver_ok(): + """ + Check if output from fpga_ver.sh is ok + """ + check_cmd = "fpga_ver.sh" + + status = run_cmd(check_cmd.split(" ")) + if status.returncode == 0: + return None + return Problem( + description=( + "fpga_ver.sh returned error. Check the below output for more details." + ), + cmd_status=status, + manual_remediation=( + "Run `wedge_power.sh reset -s` to powercycle the device then run " + "fixmybmc again and see if it has resolved. If not, send this error " + "to ENS Break/Fix." + ), + ) diff --git a/common/recipes-utils/fixmybmc/files/fixmybmc/status.py b/common/recipes-utils/fixmybmc/files/fixmybmc/status.py index f99dfae6c9e..9086c5a545c 100644 --- a/common/recipes-utils/fixmybmc/files/fixmybmc/status.py +++ b/common/recipes-utils/fixmybmc/files/fixmybmc/status.py @@ -1,4 +1,6 @@ # pyre +from subprocess import CompletedProcess +from typing import List class Status: @@ -40,12 +42,8 @@ def info(self): parts.append(self.exception) if self.description is not None: parts.append(self.description) - if self.cmd_status.args: - parts.append(f"Command: {' '.join(self.cmd_status.args)}") - if self.cmd_status.stdout is not None: - parts.append(f"stdout: {self.cmd_status.stdout}") - if self.cmd_status.stderr is not None: - parts.append(f"stderr: {self.cmd_status.stderr}") + if self.cmd_status: + parts += get_cmd_status_text(self.cmd_status) return "\n".join(parts) or None @@ -59,12 +57,14 @@ def __init__( *, description=None, exception=None, + cmd_status=None, manual_remediation=None, ) -> None: if description is None and exception is None: raise TypeError("either description or exception must be provided") self.description = description self.exception = exception + self.cmd_status = cmd_status self.manual_remediation = manual_remediation @property @@ -78,6 +78,19 @@ def info(self): parts.append(self.description) if self.exception is not None: parts.append(f"{self.exception.__class__.__qualname__}: {self.exception}") + if self.cmd_status: + parts += get_cmd_status_text(self.cmd_status) if self.has_manual_remediation: - parts.append(f"Remediation: {self.manual_remediation}") + parts.append(f"Remediation:\n{self.manual_remediation}") return "\n".join(parts) or None + + +def get_cmd_status_text(cmd_status: CompletedProcess) -> List[str]: + parts = [] + if cmd_status.args: + parts.append(f"Command: {' '.join(cmd_status.args)}") + if cmd_status.stdout: + parts.append(f"stdout:\n{cmd_status.stdout}") + if cmd_status.stderr: + parts.append(f"stderr:\n {cmd_status.stderr}") + return parts