Skip to content

Commit

Permalink
Merge pull request #4351 from boegel/run_shell_cmd_error
Browse files Browse the repository at this point in the history
improve error reporting for failing shell commands (and EasyBuild crashes)
  • Loading branch information
lexming authored Oct 23, 2023
2 parents 37d7a0e + ece71c2 commit 5ae74c9
Show file tree
Hide file tree
Showing 11 changed files with 283 additions and 57 deletions.
21 changes: 10 additions & 11 deletions easybuild/framework/easyblock.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
from easybuild.tools.hooks import MODULE_STEP, MODULE_WRITE, PACKAGE_STEP, PATCH_STEP, PERMISSIONS_STEP, POSTITER_STEP
from easybuild.tools.hooks import POSTPROC_STEP, PREPARE_STEP, READY_STEP, SANITYCHECK_STEP, SOURCE_STEP
from easybuild.tools.hooks import SINGLE_EXTENSION, TEST_STEP, TESTCASES_STEP, load_hooks, run_hook
from easybuild.tools.run import check_async_cmd, run_cmd
from easybuild.tools.run import RunShellCmdError, check_async_cmd, run_cmd
from easybuild.tools.jenkins import write_to_xml
from easybuild.tools.module_generator import ModuleGeneratorLua, ModuleGeneratorTcl, module_generator, dependencies_for
from easybuild.tools.module_naming_scheme.utilities import det_full_ec_version
Expand Down Expand Up @@ -4124,6 +4124,11 @@ def run_all_steps(self, run_test_cases):
start_time = datetime.now()
try:
self.run_step(step_name, step_methods)
except RunShellCmdError as err:
err.print()
ec_path = os.path.basename(self.cfg.path)
error_msg = f"shell command '{err.cmd_name} ...' failed in {step_name} step for {ec_path}"
raise EasyBuildError(error_msg)
finally:
if not self.dry_run:
step_duration = datetime.now() - start_time
Expand Down Expand Up @@ -4225,7 +4230,7 @@ def build_and_install_one(ecdict, init_env):
app.cfg['skip'] = skip

# build easyconfig
errormsg = '(no error)'
error_msg = '(no error)'
# timing info
start_time = time.time()
try:
Expand Down Expand Up @@ -4263,9 +4268,7 @@ def build_and_install_one(ecdict, init_env):
adjust_permissions(app.installdir, stat.S_IWUSR, add=False, recursive=True)

except EasyBuildError as err:
first_n = 300
errormsg = "build failed (first %d chars): %s" % (first_n, err.msg[:first_n])
_log.warning(errormsg)
error_msg = err.msg
result = False

ended = 'ended'
Expand Down Expand Up @@ -4387,11 +4390,7 @@ def ensure_writable_log_dir(log_dir):
# build failed
success = False
summary = 'FAILED'

build_dir = ''
if app.builddir:
build_dir = " (build directory: %s)" % (app.builddir)
succ = "unsuccessfully%s: %s" % (build_dir, errormsg)
succ = "unsuccessfully: " + error_msg

# cleanup logs
app.close_log()
Expand Down Expand Up @@ -4424,7 +4423,7 @@ def ensure_writable_log_dir(log_dir):

del app

return (success, application_log, errormsg)
return (success, application_log, error_msg)


def copy_easyblocks_for_reprod(easyblock_instances, reprod_dir):
Expand Down
28 changes: 18 additions & 10 deletions easybuild/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
from easybuild.tools.github import add_pr_labels, install_github_token, list_prs, merge_pr, new_branch_github, new_pr
from easybuild.tools.github import new_pr_from_branch
from easybuild.tools.github import sync_branch_with_develop, sync_pr_with_develop, update_branch, update_pr
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, FAIL
from easybuild.tools.hooks import BUILD_AND_INSTALL_LOOP, PRE_PREF, POST_PREF, START, END, CANCEL, CRASH, FAIL
from easybuild.tools.hooks import load_hooks, run_hook
from easybuild.tools.modules import modules_tool
from easybuild.tools.options import opts_dict_to_eb_opts, set_up_configuration, use_color
Expand Down Expand Up @@ -149,11 +149,11 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):

# keep track of success/total count
if ec_res['success']:
test_msg = "Successfully built %s" % ec['spec']
test_msg = "Successfully installed %s" % ec['spec']
else:
test_msg = "Build of %s failed" % ec['spec']
test_msg = "Installation of %s failed" % os.path.basename(ec['spec'])
if 'err' in ec_res:
test_msg += " (err: %s)" % ec_res['err']
test_msg += ": %s" % ec_res['err']

# dump test report next to log file
test_report_txt = create_test_report(test_msg, [(ec, ec_res)], init_session_state)
Expand All @@ -169,8 +169,8 @@ def build_and_install_software(ecs, init_session_state, exit_on_failure=True):
adjust_permissions(parent_dir, stat.S_IWUSR, add=False, recursive=False)

if not ec_res['success'] and exit_on_failure:
if 'traceback' in ec_res:
raise EasyBuildError(ec_res['traceback'])
if not isinstance(ec_res['err'], EasyBuildError):
raise ec_res['err']
else:
raise EasyBuildError(test_msg)

Expand Down Expand Up @@ -747,14 +747,22 @@ def prepare_main(args=None, logfile=None, testing=None):
return init_session_state, eb_go, cfg_settings


if __name__ == "__main__":
init_session_state, eb_go, cfg_settings = prepare_main()
def main_with_hooks(args=None):
init_session_state, eb_go, cfg_settings = prepare_main(args=args)
hooks = load_hooks(eb_go.options.hooks)
try:
main(prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
main(args=args, prepared_cfg_data=(init_session_state, eb_go, cfg_settings))
except EasyBuildError as err:
run_hook(FAIL, hooks, args=[err])
print_error(err.msg)
sys.exit(1)
except KeyboardInterrupt as err:
run_hook(CANCEL, hooks, args=[err])
print_error("Cancelled by user: %s" % err)
except Exception as err:
run_hook(CRASH, hooks, args=[err])
sys.stderr.write("EasyBuild crashed! Please consider reporting a bug, this should not happen...\n\n")
raise


if __name__ == "__main__":
main_with_hooks()
2 changes: 1 addition & 1 deletion easybuild/tools/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def nosupport(self, msg, ver):

def error(self, msg, *args, **kwargs):
"""Print error message and raise an EasyBuildError."""
ebmsg = "EasyBuild crashed with an error %s: " % self.caller_info()
ebmsg = "EasyBuild encountered an error %s: " % self.caller_info()
fancylogger.FancyLogger.error(self, ebmsg + msg, *args, **kwargs)

def devel(self, msg, *args, **kwargs):
Expand Down
2 changes: 2 additions & 0 deletions easybuild/tools/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
END = 'end'

CANCEL = 'cancel'
CRASH = 'crash'
FAIL = 'fail'

RUN_SHELL_CMD = 'run_shell_cmd'
Expand Down Expand Up @@ -107,6 +108,7 @@
POST_PREF + BUILD_AND_INSTALL_LOOP,
END,
CANCEL,
CRASH,
FAIL,
PRE_PREF + RUN_SHELL_CMD,
POST_PREF + RUN_SHELL_CMD,
Expand Down
107 changes: 95 additions & 12 deletions easybuild/tools/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"""
import contextlib
import functools
import inspect
import os
import re
import signal
Expand Down Expand Up @@ -75,7 +76,81 @@
]


RunResult = namedtuple('RunResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))
RunShellCmdResult = namedtuple('RunShellCmdResult', ('cmd', 'exit_code', 'output', 'stderr', 'work_dir'))


class RunShellCmdError(BaseException):

def __init__(self, cmd, exit_code, work_dir, output, stderr, caller_info, *args, **kwargs):
"""Constructor for RunShellCmdError."""
self.cmd = cmd
self.cmd_name = cmd.split(' ')[0]
self.exit_code = exit_code
self.work_dir = work_dir
self.output = output
self.stderr = stderr
self.caller_info = caller_info

msg = f"Shell command '{self.cmd_name}' failed!"
super(RunShellCmdError, self).__init__(msg, *args, **kwargs)

def print(self):
"""
Report failed shell command for this RunShellCmdError instance
"""

def pad_4_spaces(msg):
return ' ' * 4 + msg

error_info = [
'',
"ERROR: Shell command failed!",
pad_4_spaces(f"full command -> {self.cmd}"),
pad_4_spaces(f"exit code -> {self.exit_code}"),
pad_4_spaces(f"working directory -> {self.work_dir}"),
]

tmpdir = tempfile.mkdtemp(prefix='shell-cmd-error-')
output_fp = os.path.join(tmpdir, f"{self.cmd_name}.out")
with open(output_fp, 'w') as fp:
fp.write(self.output or '')

if self.stderr is None:
error_info.append(pad_4_spaces(f"output (stdout + stderr) -> {output_fp}"))
else:
stderr_fp = os.path.join(tmpdir, f"{self.cmd_name}.err")
with open(stderr_fp, 'w') as fp:
fp.write(self.stderr)
error_info.extend([
pad_4_spaces(f"output (stdout) -> {output_fp}"),
pad_4_spaces(f"error/warnings (stderr) -> {stderr_fp}"),
])

caller_file_name, caller_line_nr, caller_function_name = self.caller_info
called_from_info = f"'{caller_function_name}' function in {caller_file_name} (line {caller_line_nr})"
error_info.extend([
pad_4_spaces(f"called from -> {called_from_info}"),
'',
])

sys.stderr.write('\n'.join(error_info) + '\n')


def raise_run_shell_cmd_error(cmd, exit_code, work_dir, output, stderr):
"""
Raise RunShellCmdError for failing shell command, after collecting additional caller info
"""

# figure out where failing command was run
# need to go 3 levels down:
# 1) this function
# 2) run_shell_cmd function
# 3) run_cmd_cache decorator
# 4) actual caller site
frameinfo = inspect.getouterframes(inspect.currentframe())[3]
caller_info = (frameinfo.filename, frameinfo.lineno, frameinfo.function)

raise RunShellCmdError(cmd, exit_code, work_dir, output, stderr, caller_info)


def run_cmd_cache(func):
Expand Down Expand Up @@ -178,7 +253,7 @@ def to_cmd_str(cmd):
msg += f" (in {work_dir})"
dry_run_msg(msg, silent=silent)

return RunResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)
return RunShellCmdResult(cmd=cmd_str, exit_code=0, output='', stderr=None, work_dir=work_dir)

start_time = datetime.now()
if not hidden:
Expand All @@ -204,14 +279,29 @@ def to_cmd_str(cmd):
_log.info("Command to run was changed by pre-%s hook: '%s' (was: '%s')", RUN_SHELL_CMD, cmd, old_cmd)

_log.info(f"Running command '{cmd_str}' in {work_dir}")
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=fail_on_error,
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=stderr, check=False,
cwd=work_dir, env=env, input=stdin, shell=shell, executable=executable)

# return output as a regular string rather than a byte sequence (and non-UTF-8 characters get stripped out)
output = proc.stdout.decode('utf-8', 'ignore')
stderr_output = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None
stderr = proc.stderr.decode('utf-8', 'ignore') if split_stderr else None

res = RunShellCmdResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr, work_dir=work_dir)

res = RunResult(cmd=cmd_str, exit_code=proc.returncode, output=output, stderr=stderr_output, work_dir=work_dir)
# always log command output
cmd_name = cmd_str.split(' ')[0]
if split_stderr:
_log.info(f"Output of '{cmd_name} ...' shell command (stdout only):\n{res.output}")
_log.info(f"Warnings and errors of '{cmd_name} ...' shell command (stderr only):\n{res.stderr}")
else:
_log.info(f"Output of '{cmd_name} ...' shell command (stdout + stderr):\n{res.output}")

if res.exit_code == 0:
_log.info(f"Shell command completed successfully (see output above): {cmd_str}")
else:
_log.warning(f"Shell command FAILED (exit code {res.exit_code}, see output above): {cmd_str}")
if fail_on_error:
raise_run_shell_cmd_error(res.cmd, res.exit_code, res.work_dir, output=res.output, stderr=res.stderr)

if with_hooks:
run_hook_kwargs = {
Expand All @@ -222,13 +312,6 @@ def to_cmd_str(cmd):
}
run_hook(RUN_SHELL_CMD, hooks, post_step_hook=True, args=[cmd], kwargs=run_hook_kwargs)

if split_stderr:
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code}, "
log_msg += f"with stdout:\n{res.output}\nstderr:\n{res.stderr}"
else:
log_msg = f"Command '{cmd_str}' exited with exit code {res.exit_code} and output:\n{res.output}"
_log.info(log_msg)

if not hidden:
time_since_start = time_str_since(start_time)
trace_msg(f"command completed: exit {res.exit_code}, ran in {time_since_start}")
Expand Down
8 changes: 4 additions & 4 deletions test/framework/build_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ def test_easybuildlog(self):
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*onemorewarning.*",
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*lastwarning.*",
r"fancyroot.test_easybuildlog \[WARNING\] :: Deprecated functionality.*thisisnotprinted.*",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): err: msg: %s",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): err: msg: %s",
r"fancyroot.test_easybuildlog \[ERROR\] :: .*EasyBuild encountered an exception \(at .* in .*\): oops",
'',
])
Expand Down Expand Up @@ -168,7 +168,7 @@ def test_easybuildlog(self):
r"fancyroot.test_easybuildlog \[WARNING\] :: bleh",
r"fancyroot.test_easybuildlog \[INFO\] :: 4\+2 = 42",
r"fancyroot.test_easybuildlog \[DEBUG\] :: this is just a test",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): foo baz baz",
r"fancyroot.test_easybuildlog \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): foo baz baz",
'',
])
logtxt_regex = re.compile(r'^%s' % expected_logtxt, re.M)
Expand Down Expand Up @@ -223,7 +223,7 @@ def test_log_levels(self):
info_msg = r"%s \[INFO\] :: fyi" % prefix
warning_msg = r"%s \[WARNING\] :: this is a warning" % prefix
deprecated_msg = r"%s \[WARNING\] :: Deprecated functionality, .*: almost kaput; see .*" % prefix
error_msg = r"%s \[ERROR\] :: EasyBuild crashed with an error \(at .* in .*\): kaput" % prefix
error_msg = r"%s \[ERROR\] :: EasyBuild encountered an error \(at .* in .*\): kaput" % prefix

expected_logtxt = '\n'.join([
error_msg,
Expand Down
14 changes: 13 additions & 1 deletion test/framework/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ def setUp(self):
'',
'def fail_hook(err):',
' print("EasyBuild FAIL: %s" % err)',
'',
'def crash_hook(err):',
' print("EasyBuild CRASHED, oh no! => %s" % err)',
])
write_file(self.test_hooks_pymod, test_hooks_pymod_txt)

Expand All @@ -97,8 +100,9 @@ def test_load_hooks(self):

hooks = load_hooks(self.test_hooks_pymod)

self.assertEqual(len(hooks), 8)
self.assertEqual(len(hooks), 9)
expected = [
'crash_hook',
'fail_hook',
'parse_hook',
'post_configure_hook',
Expand Down Expand Up @@ -140,6 +144,7 @@ def test_find_hook(self):
pre_single_extension_hook = [hooks[k] for k in hooks if k == 'pre_single_extension_hook'][0]
start_hook = [hooks[k] for k in hooks if k == 'start_hook'][0]
pre_run_shell_cmd_hook = [hooks[k] for k in hooks if k == 'pre_run_shell_cmd_hook'][0]
crash_hook = [hooks[k] for k in hooks if k == 'crash_hook'][0]
fail_hook = [hooks[k] for k in hooks if k == 'fail_hook'][0]
pre_build_and_install_loop_hook = [hooks[k] for k in hooks if k == 'pre_build_and_install_loop_hook'][0]

Expand Down Expand Up @@ -175,6 +180,10 @@ def test_find_hook(self):
self.assertEqual(find_hook('fail', hooks, pre_step_hook=True), None)
self.assertEqual(find_hook('fail', hooks, post_step_hook=True), None)

self.assertEqual(find_hook('crash', hooks), crash_hook)
self.assertEqual(find_hook('crash', hooks, pre_step_hook=True), None)
self.assertEqual(find_hook('crash', hooks, post_step_hook=True), None)

hook_name = 'build_and_install_loop'
self.assertEqual(find_hook(hook_name, hooks), None)
self.assertEqual(find_hook(hook_name, hooks, pre_step_hook=True), pre_build_and_install_loop_hook)
Expand Down Expand Up @@ -209,6 +218,7 @@ def run_hooks():
run_hook('single_extension', hooks, post_step_hook=True, args=[None])
run_hook('extensions', hooks, post_step_hook=True, args=[None])
run_hook('fail', hooks, args=[EasyBuildError('oops')])
run_hook('crash', hooks, args=[RuntimeError('boom!')])
stdout = self.get_stdout()
stderr = self.get_stderr()
self.mock_stdout(False)
Expand Down Expand Up @@ -244,6 +254,8 @@ def run_hooks():
"this is run before installing an extension",
"== Running fail hook...",
"EasyBuild FAIL: 'oops'",
"== Running crash hook...",
"EasyBuild CRASHED, oh no! => boom!",
]
expected_stdout = '\n'.join(expected_stdout_lines)

Expand Down
1 change: 1 addition & 0 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ def test_avail_hooks(self):
" post_build_and_install_loop_hook",
" end_hook",
" cancel_hook",
" crash_hook",
" fail_hook",
" pre_run_shell_cmd_hook",
" post_run_shell_cmd_hook",
Expand Down
Loading

0 comments on commit 5ae74c9

Please sign in to comment.