From 4fdbee66e938bee5e3f9e3f20339b4965cce029a Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 22 Aug 2024 12:20:42 -0400 Subject: [PATCH] Implement tool upgrade assistant script. Inspect a tool's XML file and provide advice on upgrading to new tool versions. It is implemented as a library in tool_util for integration with Planemo in the future but I've added a script here to run it on the command-line directly. It can also output in JSON for integration with external tools such as the galaxy language server. --- lib/galaxy/tool_util/parser/xml.py | 6 +- lib/galaxy/tool_util/upgrade/__init__.py | 189 ++++++++++++++++++ lib/galaxy/tool_util/upgrade/script.py | 92 +++++++++ .../tool_util/upgrade/upgrade_codes.json | 53 +++++ lib/galaxy/tool_util/xsd/galaxy.xsd | 6 +- packages/tool_util/setup.cfg | 1 + test/functional/tools/legacy_interpreter.xml | 19 ++ .../tools/legacy_interpreter_write_output.py | 2 + test/functional/tools/output_format_input.xml | 2 +- test/functional/tools/sample_tool_conf.xml | 1 + test/unit/tool_util/upgrade/__init__.py | 0 .../tool_util/upgrade/test_upgrade_advice.py | 110 ++++++++++ 12 files changed, 478 insertions(+), 3 deletions(-) create mode 100644 lib/galaxy/tool_util/upgrade/__init__.py create mode 100755 lib/galaxy/tool_util/upgrade/script.py create mode 100644 lib/galaxy/tool_util/upgrade/upgrade_codes.json create mode 100644 test/functional/tools/legacy_interpreter.xml create mode 100644 test/functional/tools/legacy_interpreter_write_output.py create mode 100644 test/unit/tool_util/upgrade/__init__.py create mode 100644 test/unit/tool_util/upgrade/test_upgrade_advice.py diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py index 9ab4a30f65b6..8e098236a110 100644 --- a/lib/galaxy/tool_util/parser/xml.py +++ b/lib/galaxy/tool_util/parser/xml.py @@ -376,6 +376,10 @@ def _get_option_value(self, key, default): def _command_el(self): return self.root.find("command") + @property + def _outputs_el(self): + return self.root.find("outputs") + def _get_attribute_as_bool(self, attribute, default, elem=None): if elem is None: elem = self.root @@ -411,7 +415,7 @@ def parse_input_pages(self) -> "XmlPagesSource": def parse_provided_metadata_style(self): style = None - out_elem = self.root.find("outputs") + out_elem = self._outputs_el if out_elem is not None and "provided_metadata_style" in out_elem.attrib: style = out_elem.attrib["provided_metadata_style"] diff --git a/lib/galaxy/tool_util/upgrade/__init__.py b/lib/galaxy/tool_util/upgrade/__init__.py new file mode 100644 index 000000000000..58f4639beab5 --- /dev/null +++ b/lib/galaxy/tool_util/upgrade/__init__.py @@ -0,0 +1,189 @@ +# todo track down PR URLs or docs for and add to upgrade_codes.json: +# - 18_09_consider_python_environment +# - 20_09_consider_output_collection_order +# TODO: write migration advice for: +# - 18.09: References to other inputs need to be fully qualified by using `|`. +# - 18.09: Do not allow provided but illegal default values. +from json import loads +from typing import ( + cast, + Dict, + List, + Optional, +) + +from typing_extensions import ( + Literal, + NotRequired, + TypedDict, +) + +from galaxy.tool_util.parser.factory import get_tool_source +from galaxy.tool_util.parser.xml import XmlToolSource +from galaxy.util.resources import resource_string + + +class AdviceCode(TypedDict): + name: str + level: Literal["must_fix", "consider", "ready", "info"] + message: str + niche: NotRequired[bool] + url: NotRequired[str] + + +upgrade_codes_json = resource_string(__package__, "upgrade_codes.json") +upgrade_codes_by_name: Dict[str, AdviceCode] = {} + +for name, upgrade_object in loads(upgrade_codes_json).items(): + upgrade_object["name"] = name + upgrade_codes_by_name[name] = cast(AdviceCode, upgrade_object) + + +class AdviceCollection: + _advice: List[AdviceCode] + + def __init__(self): + self._advice = [] + + def add(self, code: str): + self._advice.append(upgrade_codes_by_name[code]) + + def to_list(self) -> AdviceCode: + return self._advice + + +class ProfileMigration: + """A class offering advice on upgrading a Galaxy tool between two profile versions.""" + + from_version: str + to_version: str + + +class ProfileMigration16_04(ProfileMigration): + from_version = "16.01" + to_version = "16.04" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + interpreter = tool_source.parse_interpreter() + if interpreter: + advice_collection.add("16_04_fix_interpreter") + else: + advice_collection.add("16_04_ready_interpreter") + advice_collection.add("16_04_consider_implicit_extra_file_collection") + + if has_matching_xpath(tool_source, "//data[@format = 'input']"): + advice_collection.add("16_04_fix_output_format") + + +class ProfileMigration17_09(ProfileMigration): + from_version = "16.04" + to_version = "17.09" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + + outputs_el = tool_source._outputs_el + if outputs_el is not None and outputs_el.get("`provided_metadata_style`", None) is not None: + advice_collection.add("17_09_consider_provided_metadata_style") + + +class ProfileMigration18_01(ProfileMigration): + from_version = "17.09" + to_version = "18.01" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + command_el = tool_source._command_el + if command_el is not None and command_el.get("use_shared_home", None) is None: + advice_collection.add("18_01_consider_home_directory") + + +class ProfileMigration18_09(ProfileMigration): + from_version = "18.01" + to_version = "18.09" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + tool_type = tool_source.parse_tool_type() + if tool_type == "manage_data": + advice_collection.add("18_09_consider_python_environment") + + +class ProfileMigration20_05(ProfileMigration): + from_version = "18.09" + to_version = "20.05" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + + if has_matching_xpath(tool_source, "//configfiles/inputs"): + advice_collection.add("20_05_consider_inputs_as_json_changes") + + +class ProfileMigration20_09(ProfileMigration): + from_version = "20.05" + to_version = "20.09" + + def advise(advice_collection: AdviceCollection, xml_file: str): + tool_source = _xml_tool_source(xml_file) + + tests = tool_source.parse_tests_to_dict() + for test in tests["tests"]: + output_collections = test.get("output_collections") + if not output_collections: + continue + + for output_collection in output_collections: + if output_collection.get("element_tests"): + advice_collection.add("20_09_consider_output_collection_order") + + command_el = tool_source._command_el + if command_el is not None: + strict = command_el.get("strict", None) + if strict is None: + advice_collection.add("20_09_consider_set_e") + + +# For tools with profile >= 20.05 a select with ``multiple="true"`` is rendered as an array which is empty if nothing is selected. For older profile versions select lists are rendered as comma separated strings or a literal ``null`` in case nothing is selected. + + +profile_migrations: List[ProfileMigration] = [ + ProfileMigration16_04, + ProfileMigration17_09, + ProfileMigration18_01, + ProfileMigration18_09, + ProfileMigration20_05, + ProfileMigration20_09, +] + +latest_supported_version = "21.1" + + +def advise_on_upgrade(xml_file: str, to_version: Optional[str] = None) -> List[AdviceCode]: + to_version = to_version or latest_supported_version + tool_source = _xml_tool_source(xml_file) + initial_version = tool_source.parse_profile() + advice_collection = AdviceCollection() + + for migration in profile_migrations: + if migration.to_version < initial_version: + # tool started past here... just skip this advice + continue + if migration.to_version > to_version: + # we're not advising on upgrading past this point + break + migration.advise(advice_collection, xml_file) + + return advice_collection.to_list() + + +def _xml_tool_source(xml_file: str) -> XmlToolSource: + tool_source = get_tool_source(xml_file) + if not isinstance(tool_source, XmlToolSource): + raise Exception("Can only provide upgrade advice for XML tools.") + return cast(XmlToolSource, tool_source) + + +def has_matching_xpath(tool_source: XmlToolSource, xpath: str) -> bool: + return tool_source.xml_tree.find(xpath) is not None diff --git a/lib/galaxy/tool_util/upgrade/script.py b/lib/galaxy/tool_util/upgrade/script.py new file mode 100755 index 000000000000..f4679e74488f --- /dev/null +++ b/lib/galaxy/tool_util/upgrade/script.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +import argparse +import sys +from json import dumps +from textwrap import wrap +from typing import ( + List, + Optional, +) + +from galaxy.tool_util.upgrade import ( + AdviceCode, + advise_on_upgrade, + latest_supported_version, +) + +LEVEL_TO_STRING = { + "must_fix": "❌", + "ready": "✅", + "consider": "🤔", + "info": "ℹī¸", +} +DESCRIPTION = f""" +A small utility to check for potential problems and provide advice when upgrading a tool's +profile version. This version of the script can provide advice for upgrading tools through +{latest_supported_version}. +""" + + +def arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description=DESCRIPTION) + parser.add_argument("xml_file") + parser.add_argument( + "-p", + "--profile-version", + dest="profile_version", + default=latest_supported_version, + help="Provide upgrade advice up to this Galaxy tool profile version.", + ) + parser.add_argument( + "-j", + "--json", + default=False, + action="store_true", + help="Output aadvice as JSON.", + ) + parser.add_argument( + "-n", + "--niche", + default=False, + action="store_true", + help="Include advice about niche features that may not be relevant for most tools - including the use of 'galaxy.json' and writing global state in the $HOME directory.", + ) + return parser + + +def _print_advice(advice: AdviceCode): + message = "\n".join(wrap(advice["message"], initial_indent="", subsequent_indent=" ")) + level = advice["level"] + level_str = LEVEL_TO_STRING[level] + url = advice.get("url") + print(f"- {level_str}{message}\n") + if url: + print(f" More information at {url}") + + +def _print_advice_list(advice_list: List[AdviceCode]): + for advice in advice_list: + _print_advice(advice) + + +def advise(xml_file: str, version: str, json: bool, niche: bool): + advice_list = advise_on_upgrade(xml_file, version) + if not niche: + advice_list = [a for a in advice_list if not a.get("niche", False)] + if json: + print(dumps(advice_list)) + else: + _print_advice_list(advice_list) + + +def main(argv=None) -> None: + if argv is None: + argv = sys.argv[1:] + + args = arg_parser().parse_args(argv) + advise(args.xml_file, args.profile_version, args.json, args.niche) + + +if __name__ == "__main__": + main() diff --git a/lib/galaxy/tool_util/upgrade/upgrade_codes.json b/lib/galaxy/tool_util/upgrade/upgrade_codes.json new file mode 100644 index 000000000000..671dc3760424 --- /dev/null +++ b/lib/galaxy/tool_util/upgrade/upgrade_codes.json @@ -0,0 +1,53 @@ +{ + "16_04_fix_interpreter": { + "level": "must_fix", + "message": "This tool uses an interpreter on the command block, this was disabled with 16.04. The command line needs to be rewritten to call the language runtime with a full path to the target script using `$tool_directory` to refer to the path to the tool and its script.", + "url": "https://github.com/galaxyproject/galaxy/pull/1688" + }, + "16_04_ready_interpreter": { + "level": "ready", + "message": "This tool follows best practice and does not specify an interpreter on the command block.", + "url": "https://github.com/galaxyproject/galaxy/pull/1688" + }, + "16_04_consider_implicit_extra_file_collection": { + "level": "consider", + "message": "Starting is with profile 16.04 tools, Galaxy no longer attempts to just find tool outputs keyed on the output ID in the working directory. Tool outputs need to be explicitly declared and dynamic outputs need to be specified in a 'galaxy.json' file or with a 'discover_datasets' block.", + "url": "https://github.com/galaxyproject/galaxy/pull/1688", + "niche": true + }, + "16_04_fix_output_format": { + "level": "must_fix", + "message": "Starting with 16.04 tools, having format='input' on a tool output is disabled. The behavior was not well defined for these outputs. Please add 'format_source=\"a_specific_input_name\" for a specific input to inherit the format from.", + "url": "https://github.com/galaxyproject/galaxy/pull/1688" + }, + "20_05_consider_inputs_as_json_changes": { + "level": "consider", + "message": "Starting with 20.05, the format of data in 'inputs' config files changed slightly. Unselected optional `select` and `data_column` parameters get json null values instead of the string 'None' and multiple `select` and `data_column` parameters are lists (instead of comma separated strings).", + "url": "https://github.com/galaxyproject/galaxy/pull/9776/files" + }, + "20_09_consider_output_collection_order": { + "level": "consider", + "message": "Starting in profile 20.09 tools, the order elements defined in tool test became relevant in order to verify collections are properly sorted. This may cause tool tests to fail after the upgrade, rearrange the elements defined in output collections if this occurs." + }, + "20_09_consider_set_e": { + "level": "consider", + "message": "Starting with profile 20.09 tools, tool scripts are executed with the 'set -e' instruction. The 'set -e' option instructs the shell to immediately exit if any command has a non-zero exit status. If your command uses multiple sub-commands and you'd like to allow them to execute with non-zero exit codes add 'strict=\"false\"' to the command tag to restore the tool's legacy behavior.", + "url": "https://github.com/galaxyproject/galaxy/pull/9962" + }, + "18_01_consider_home_directory": { + "level": "consider", + "message": "Starting with profile 18.01 tools, each job is given its own home directory. Most tools should not depend on global state in a home directory, if this is required though set 'use_shared_home=\"true\"' on the command tag of the tool.", + "url": "https://github.com/galaxyproject/galaxy/pull/5193", + "niche": true + }, + "18_09_consider_python_environment": { + "level": "consider", + "message": "Starting with profile 18.09 tools, data managers run without Galaxy's virtual environment. Be sure your requirements reflect all the data manager's dependencies." + }, + "17_09_consider_provided_metadata_style": { + "level": "consider", + "message": "Starting with 17.09 tools, the format of 'galaxy.json' (a rarely used file that can be used to dynamically collect datasets or metadata about datasets produced by the tool) changed - the original behavior can be restored by adding 'provided_metadata_style=\"legacy\"' to the tool's outputs tag.", + "url": "https://github.com/galaxyproject/galaxy/pull/4437", + "niche": true + } +} diff --git a/lib/galaxy/tool_util/xsd/galaxy.xsd b/lib/galaxy/tool_util/xsd/galaxy.xsd index 7c17312c4f0b..de34e758331b 100644 --- a/lib/galaxy/tool_util/xsd/galaxy.xsd +++ b/lib/galaxy/tool_util/xsd/galaxy.xsd @@ -29,10 +29,14 @@ List of behavior changes associated with profile versions: - Disable default tool version of 1.0.0. - Use non zero exit code as default stdio error condition (before non-empty stderr). +#### 17.09 + +- Introduce `provided_metadata_style` with default `"default"`. Restore legacy behavior by setting + this to `"legacy"`. + #### 18.01 - Use a separate home directory for each job. -- Introduce `provided_metadata_style` with default `"default"` before `"legacy"`. #### 18.09 diff --git a/packages/tool_util/setup.cfg b/packages/tool_util/setup.cfg index 7c8fd75feec1..a83feee3e048 100644 --- a/packages/tool_util/setup.cfg +++ b/packages/tool_util/setup.cfg @@ -49,6 +49,7 @@ python_requires = >=3.7 [options.entry_points] console_scripts = galaxy-tool-test = galaxy.tool_util.verify.script:main + galaxy-tool-upgrade-advisor = galaxy.tool_util.upgrade.script:main mulled-build = galaxy.tool_util.deps.mulled.mulled_build:main mulled-build-channel = galaxy.tool_util.deps.mulled.mulled_build_channel:main mulled-build-files = galaxy.tool_util.deps.mulled.mulled_build_files:main diff --git a/test/functional/tools/legacy_interpreter.xml b/test/functional/tools/legacy_interpreter.xml new file mode 100644 index 000000000000..7ca65584d388 --- /dev/null +++ b/test/functional/tools/legacy_interpreter.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/legacy_interpreter_write_output.py b/test/functional/tools/legacy_interpreter_write_output.py new file mode 100644 index 000000000000..25791f7b3d86 --- /dev/null +++ b/test/functional/tools/legacy_interpreter_write_output.py @@ -0,0 +1,2 @@ +with open("output1", "w") as f: + f.write("hello world") diff --git a/test/functional/tools/output_format_input.xml b/test/functional/tools/output_format_input.xml index c346b10dedf3..6080f0c21ca2 100644 --- a/test/functional/tools/output_format_input.xml +++ b/test/functional/tools/output_format_input.xml @@ -1,6 +1,6 @@ + (for legacy tools, i.e. profile <16.04, the format of a random input is used) --> cat '$input' > '$output' diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index 80c032c93601..4afe5cf253f8 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -176,6 +176,7 @@ + diff --git a/test/unit/tool_util/upgrade/__init__.py b/test/unit/tool_util/upgrade/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/test/unit/tool_util/upgrade/test_upgrade_advice.py b/test/unit/tool_util/upgrade/test_upgrade_advice.py new file mode 100644 index 000000000000..c8fc085bd0a6 --- /dev/null +++ b/test/unit/tool_util/upgrade/test_upgrade_advice.py @@ -0,0 +1,110 @@ +import os +from typing import List + +import pytest + +from galaxy.tool_util.unittest_utils import functional_test_tool_path +from galaxy.tool_util.upgrade import ( + AdviceCode, + advise_on_upgrade, +) + + +def test_does_not_work_on_non_xml_tools(): + simple_constructs = _tool_path("simple_constructs.yml") + with pytest.raises(Exception): + advise_on_upgrade(simple_constructs, "16.04") + + +def test_old_advice_does_not_appear_when_upgrading_past_it(): + a_16_01_tool = _tool_path("tool_provided_metadata_1.xml") + advice = advise_on_upgrade(a_16_01_tool) + assert_has_advice(advice, "16_04_consider_implicit_extra_file_collection") + + a_17_09_tool = _tool_path("tool_provided_metadata_6.xml") + advice = advise_on_upgrade(a_17_09_tool) + assert_not_has_advice(advice, "16_04_consider_implicit_extra_file_collection") + + +def test_interpreter_advice_positive(): + legacy_interpreter = _tool_path("legacy_interpreter.xml") + advice = advise_on_upgrade(legacy_interpreter, "16.04") + assert_has_advice(advice, "16_04_fix_interpreter") + assert_not_has_advice(advice, "16_04_ready_interpreter") + + +def test_interpreter_advice_negative(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "16.04") + assert_not_has_advice(advice, "16_04_fix_interpreter") + assert_has_advice(advice, "16_04_ready_interpreter") + + +def test_output_format_advice_positive(): + output_format = _tool_path("output_format.xml") + advice = advise_on_upgrade(output_format, "16.04") + assert_has_advice(advice, "16_04_fix_output_format") + + +def test_20_05_inputs_changes(): + inputs_as_json = _tool_path("inputs_as_json.xml") + advice = advise_on_upgrade(inputs_as_json) + assert_has_advice(advice, "20_05_consider_inputs_as_json_changes") + + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs) + assert_not_has_advice(advice, "20_05_consider_inputs_as_json_changes") + + +def test_output_format_advice_negative(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "16.04") + assert_not_has_advice(advice, "16_04_fix_output_format") + + +def test_consider_implicit_extra_file_collection(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "16.04") + assert_has_advice(advice, "16_04_consider_implicit_extra_file_collection") + + +def test_2009_output_collection_advice_positive(): + collection_creates_list = _tool_path("collection_creates_list.xml") + advice = advise_on_upgrade(collection_creates_list, "24.1") + assert_has_advice(advice, "20_09_consider_output_collection_order") + + +def test_2009_output_collection_advice_negative(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "24.1") + assert_not_has_advice(advice, "20_09_consider_output_collection_order") + + +def test_2009_consider_strict_shell_positive(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "24.1") + assert_has_advice(advice, "20_09_consider_set_e") + + +def test_1801_consider_home_directory(): + simple_constructs = _tool_path("simple_constructs.xml") + advice = advise_on_upgrade(simple_constructs, "24.1") + assert_has_advice(advice, "18_01_consider_home_directory") + + +def _tool_path(tool_name: str): + return os.path.join(functional_test_tool_path(tool_name)) + + +def assert_has_advice(advice_list: List[AdviceCode], advice_code: str): + for advice in advice_list: + if advice["name"] == advice_code: + return + + raise AssertionError(f"Was expecting advice {advice_code} in list of upgrade advice {advice_list}") + + +def assert_not_has_advice(advice_list: List[AdviceCode], advice_code: str): + for advice in advice_list: + if advice["name"] == advice_code: + raise AssertionError(f"Was not expecting advice {advice_code} in list of upgrade advice {advice_list}")