diff --git a/client/src/components/JobInformation/JobInformation.test.js b/client/src/components/JobInformation/JobInformation.test.js index f90dd1385492..df8448c4d1dd 100644 --- a/client/src/components/JobInformation/JobInformation.test.js +++ b/client/src/components/JobInformation/JobInformation.test.js @@ -69,7 +69,7 @@ describe("JobInformation/JobInformation.vue", () => { }); it("job messages", async () => { - const rendered_link = jobInfoTable.findAll(`#job-messages li`); + const rendered_link = jobInfoTable.findAll(`#job-messages .job-message`); expect(rendered_link.length).toBe(jobResponse.job_messages.length); for (let i = 0; i < rendered_link.length; i++) { const msg = rendered_link.at(i).text(); diff --git a/client/src/components/JobInformation/JobInformation.vue b/client/src/components/JobInformation/JobInformation.vue index 95fe2d8cb7bf..759e59b97418 100644 --- a/client/src/components/JobInformation/JobInformation.vue +++ b/client/src/components/JobInformation/JobInformation.vue @@ -1,6 +1,75 @@ + + - + diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index ae9a2dddca06..86c43b45ed7c 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -75,6 +75,7 @@ check_output, DETECTED_JOB_STATE, ) +from galaxy.tool_util.parser.stdio import StdioErrorLevel from galaxy.tools.evaluation import ( PartialToolEvaluator, ToolEvaluator, @@ -1939,7 +1940,13 @@ def fail(message=job.info, exception=None): self.discover_outputs(job, inp_data, out_data, out_collections, final_job_state=final_job_state) except MaxDiscoveredFilesExceededError as e: final_job_state = job.states.ERROR - job.job_messages = [str(e)] + job.job_messages = [ + { + "type": "internal", + "desc": str(e), + "error_level": StdioErrorLevel.FATAL, + } + ] for dataset_assoc in output_dataset_associations: if getattr(dataset_assoc.dataset, "discovered", False): @@ -2088,12 +2095,8 @@ def discover_outputs(self, job, inp_data, out_data, out_collections, final_job_s ) def check_tool_output(self, tool_stdout, tool_stderr, tool_exit_code, job, job_stdout=None, job_stderr=None): - job_id_tag = "" - if job is not None: - job_id_tag = job.get_id_tag() - state, tool_stdout, tool_stderr, job_messages = check_output( - self.tool.stdio_regexes, self.tool.stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code, job_id_tag + self.tool.stdio_regexes, self.tool.stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code ) # Store the modified stdout and stderr in the job: diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py index 8c36bf70a1d9..f1c8906c39fd 100644 --- a/lib/galaxy/metadata/set_metadata.py +++ b/lib/galaxy/metadata/set_metadata.py @@ -19,7 +19,12 @@ import traceback from functools import partial from pathlib import Path -from typing import Optional +from typing import ( + Any, + Dict, + List, + Optional, +) try: from pulsar.client.staging import COMMAND_VERSION_FILENAME @@ -61,6 +66,7 @@ DETECTED_JOB_STATE, ) from galaxy.tool_util.parser.stdio import ( + StdioErrorLevel, ToolStdioExitCode, ToolStdioRegex, ) @@ -68,6 +74,7 @@ from galaxy.util import ( safe_contains, stringify_dictionary_keys, + unicodify, ) from galaxy.util.expressions import ExpressionContext @@ -216,7 +223,7 @@ def set_meta(new_dataset_instance, file_dict): export_store = None final_job_state = Job.states.OK - job_messages = [] + job_messages: List[Dict[str, Any]] = [] if extended_metadata_collection: tool_dict = metadata_params["tool"] stdio_exit_code_dicts, stdio_regex_dicts = tool_dict["stdio_exit_codes"], tool_dict["stdio_regexes"] @@ -237,25 +244,25 @@ def set_meta(new_dataset_instance, file_dict): for directory, prefix in locations: if directory and os.path.exists(os.path.join(directory, f"{prefix}stdout")): with open(os.path.join(directory, f"{prefix}stdout"), "rb") as f: - tool_stdout = f.read(MAX_STDIO_READ_BYTES) + tool_stdout = unicodify(f.read(MAX_STDIO_READ_BYTES), strip_null=True) with open(os.path.join(directory, f"{prefix}stderr"), "rb") as f: - tool_stderr = f.read(MAX_STDIO_READ_BYTES) + tool_stderr = unicodify(f.read(MAX_STDIO_READ_BYTES), strip_null=True) break else: if os.path.exists(os.path.join(tool_job_working_directory, "task_0")): # We have a task splitting job - tool_stdout = b"" - tool_stderr = b"" + tool_stdout = "" + tool_stderr = "" paths = tool_job_working_directory.glob("task_*") for path in paths: with open(path / "outputs" / "tool_stdout", "rb") as f: - task_stdout = f.read(MAX_STDIO_READ_BYTES) + task_stdout = unicodify(f.read(MAX_STDIO_READ_BYTES), strip_null=True) if task_stdout: - tool_stdout = b"%s[%s stdout]\n%s\n" % (tool_stdout, path.name.encode(), task_stdout) + tool_stdout = f"{tool_stdout}[{path.name} stdout]\n{task_stdout}\n" with open(path / "outputs" / "tool_stderr", "rb") as f: - task_stderr = f.read(MAX_STDIO_READ_BYTES) + task_stderr = unicodify(f.read(MAX_STDIO_READ_BYTES), strip_null=True) if task_stderr: - tool_stderr = b"%s[%s stdout]\n%s\n" % (tool_stderr, path.name.encode(), task_stderr) + tool_stderr = f"{tool_stderr}[{path.name} stderr]\n{task_stderr}\n" else: wdc = os.listdir(tool_job_working_directory) odc = os.listdir(outputs_directory) @@ -265,7 +272,7 @@ def set_meta(new_dataset_instance, file_dict): log.warning(f"{error_desc}. {error_extra}") raise Exception(error_desc) else: - tool_stdout = tool_stderr = b"" + tool_stdout = tool_stderr = "" job_id_tag = metadata_params["job_id_tag"] @@ -273,7 +280,7 @@ def set_meta(new_dataset_instance, file_dict): tool_exit_code = read_exit_code_from(exit_code_file, job_id_tag) check_output_detected_state, tool_stdout, tool_stderr, job_messages = check_output( - stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code, job_id_tag + stdio_regexes, stdio_exit_codes, tool_stdout, tool_stderr, tool_exit_code ) if check_output_detected_state == DETECTED_JOB_STATE.OK and not tool_provided_metadata.has_failed_outputs(): final_job_state = Job.states.OK @@ -340,7 +347,15 @@ def set_meta(new_dataset_instance, file_dict): collect_dynamic_outputs(job_context, output_collections) except MaxDiscoveredFilesExceededError as e: final_job_state = Job.states.ERROR - job_messages.append(str(e)) + job_messages.append( + { + "type": "max_discovered_files", + "desc": str(e), + "code_desc": None, + "error_level": StdioErrorLevel.FATAL, + } + ) + if job: job.set_streams(tool_stdout=tool_stdout, tool_stderr=tool_stderr, job_messages=job_messages) job.state = final_job_state diff --git a/lib/galaxy/tool_util/output_checker.py b/lib/galaxy/tool_util/output_checker.py index 8612737da7c9..ca6dc11be40f 100644 --- a/lib/galaxy/tool_util/output_checker.py +++ b/lib/galaxy/tool_util/output_checker.py @@ -1,9 +1,21 @@ import re from enum import Enum from logging import getLogger +from typing import ( + Any, + Dict, + List, + Tuple, + TYPE_CHECKING, +) from galaxy.tool_util.parser.stdio import StdioErrorLevel -from galaxy.util import unicodify + +if TYPE_CHECKING: + from galaxy.tool_util.parser.stdio import ( + ToolStdioExitCode, + ToolStdioRegex, + ) log = getLogger(__name__) @@ -17,7 +29,9 @@ class DETECTED_JOB_STATE(str, Enum): ERROR_PEEK_SIZE = 2000 -def check_output_regex(job_id_tag, regex, stream, stream_name, job_messages, max_error_level): +def check_output_regex( + regex: "ToolStdioRegex", stream: str, stream_name: str, job_messages: List[Dict[str, Any]], max_error_level: int +) -> int: """ check a single regex against a stream @@ -35,7 +49,13 @@ def check_output_regex(job_id_tag, regex, stream, stream_name, job_messages, max return max_error_level -def check_output(stdio_regexes, stdio_exit_codes, stdout, stderr, tool_exit_code, job_id_tag): +def check_output( + stdio_regexes: List["ToolStdioRegex"], + stdio_exit_codes: List["ToolStdioExitCode"], + stdout: str, + stderr: str, + tool_exit_code: int, +) -> Tuple[str, str, str, List[Dict[str, Any]]]: """ Check the output of a tool - given the stdout, stderr, and the tool's exit code, return DETECTED_JOB_STATE.OK if the tool exited succesfully or @@ -51,9 +71,6 @@ def check_output(stdio_regexes, stdio_exit_codes, stdout, stderr, tool_exit_code # has a bug but the tool was ok, and it lets a workflow continue. state = DETECTED_JOB_STATE.OK - stdout = unicodify(stdout, strip_null=True) - stderr = unicodify(stderr, strip_null=True) - # messages (descriptions of the detected exit_code and regexes) # to be prepended to the stdout/stderr after all exit code and regex tests # are done (otherwise added messages are searched again). @@ -118,16 +135,12 @@ def check_output(stdio_regexes, stdio_exit_codes, stdout, stderr, tool_exit_code # - If it matched, then determine the error level. # o If it was fatal, then we're done - break. if regex.stderr_match: - max_error_level = check_output_regex( - job_id_tag, regex, stderr, "stderr", job_messages, max_error_level - ) + max_error_level = check_output_regex(regex, stderr, "stderr", job_messages, max_error_level) if max_error_level >= StdioErrorLevel.MAX: break if regex.stdout_match: - max_error_level = check_output_regex( - job_id_tag, regex, stdout, "stdout", job_messages, max_error_level - ) + max_error_level = check_output_regex(regex, stdout, "stdout", job_messages, max_error_level) if max_error_level >= StdioErrorLevel.MAX: break @@ -136,10 +149,10 @@ def check_output(stdio_regexes, stdio_exit_codes, stdout, stderr, tool_exit_code if max_error_level == StdioErrorLevel.FATAL_OOM: state = DETECTED_JOB_STATE.OUT_OF_MEMORY_ERROR elif max_error_level >= StdioErrorLevel.FATAL: - reason = "" + error_reason = "" if job_messages: - reason = f" Reasons are {job_messages}" - log.info(f"Job error detected, failing job.{reason}") + error_reason = f" Reasons are {job_messages}" + log.info(f"Job error detected, failing job.{error_reason}") state = DETECTED_JOB_STATE.GENERIC_ERROR # When there are no regular expressions and no exit codes to check, @@ -159,7 +172,7 @@ def check_output(stdio_regexes, stdio_exit_codes, stdout, stderr, tool_exit_code return state, stdout, stderr, job_messages -def __regex_err_msg(match, stream, regex): +def __regex_err_msg(match: re.Match, stream: str, regex: "ToolStdioRegex"): """ Return a message about the match on tool output using the given ToolStdioRegex regex object. The regex_match is a MatchObject @@ -177,7 +190,7 @@ def __regex_err_msg(match, stream, regex): # If there's a description for the regular expression, then use it. # Otherwise, we'll take the first 256 characters of the match. if regex.desc is not None: - desc += regex.desc + desc += match.expand(regex.desc) else: desc += f"Matched on {match_str}" return { diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index cefc9fc8d8de..c4e6f1a8eea3 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -7022,6 +7022,27 @@ stderr are checked. If ``Branch A`` is at the beginning of stdout or stderr, the a warning will be registered and the source that contained ``Branch A`` will be prepended with the warning ``Warning: Branch A was taken in execution``. +Since Galaxy 24.0 groups defined in the regular expression are expanded in the +description (using the syntax of the [``expand`` function](https://docs.python.org/3/library/re.html#re.Match.expand)). +For the first ``regex`` in the following example the ``\1`` will be replaced +by the content of the text matching ``.*`` that follows on ``INFO: ``, +i.e. the content of the first group. +The second regular expression defines a named group ``error_message`` +which then replaces the corresponding placeholder ``\g`` in the +description. Note the quoting of the ``<`` and ``>`` characters in XML. + +```xml + + + + +``` ]]> diff --git a/lib/galaxy_test/api/test_tools.py b/lib/galaxy_test/api/test_tools.py index 0dda061d14dd..7befe7f221b1 100644 --- a/lib/galaxy_test/api/test_tools.py +++ b/lib/galaxy_test/api/test_tools.py @@ -1396,11 +1396,17 @@ def test_qc_messages(self, history_id): job_id = create["jobs"][0]["id"] details = self.dataset_populator.get_job_details(job_id, full=True).json() assert "job_messages" in details, details + # test autogenerated message (if regex defines no description attribute) qc_message = details["job_messages"][0] # assert qc_message["code_desc"] == "QC Metrics for Tool", qc_message assert qc_message["desc"] == "QC: Matched on Quality of sample is 30%." assert qc_message["match"] == "Quality of sample is 30%." assert qc_message["error_level"] == 1.1 + # test message generated from the description containing a reference to group defined in the regex + qc_message = details["job_messages"][1] + assert qc_message["desc"] == "QC: Sample quality 30" + assert qc_message["match"] == "Quality of sample is 30%." + assert qc_message["error_level"] == 1.1 @skip_without_tool("cat1") def test_multirun_cat1(self, history_id): diff --git a/test/functional/tools/qc_stdout.xml b/test/functional/tools/qc_stdout.xml index 634f48a6396b..5f4e50bdfa8f 100644 --- a/test/functional/tools/qc_stdout.xml +++ b/test/functional/tools/qc_stdout.xml @@ -1,6 +1,7 @@ - + + .{3})bar" + regex.desc = r"\g" + self.__add_regex(regex) + self.stderr = "ERROR: foobar" + _, _, _, job_messages = self.__check_output() + assert job_messages[0]["desc"] == "Fatal error: foo" + self.__assertNotSuccessful() + def test_stdout_ignored_for_stderr_regexes(self): regex = ToolStdioRegex() regex.stderr_match = True @@ -99,5 +121,5 @@ def __assertNotSuccessful(self): def __check_output(self): return check_output( - self.tool.stdio_regexes, self.tool.stdio_exit_codes, self.stdout, self.stderr, self.tool_exit_code, "job_id" + self.tool.stdio_regexes, self.tool.stdio_exit_codes, self.stdout, self.stderr, self.tool_exit_code ) diff --git a/test/unit/tool_util/test_parsing.py b/test/unit/tool_util/test_parsing.py index 4b5a2202d96d..7550bea601ab 100644 --- a/test/unit/tool_util/test_parsing.py +++ b/test/unit/tool_util/test_parsing.py @@ -882,6 +882,6 @@ class TestQcStdio(BaseLoaderTestCase): def test_tests(self): exit, regexes = self._tool_source.parse_stdio() assert len(exit) == 2 - assert len(regexes) == 1 + assert len(regexes) == 2 regex = regexes[0] assert regex.error_level == 1.1