From b3ce7b84b95a46594b8d8e8343a0b23a0fd62875 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Fri, 3 May 2024 13:10:11 +0200 Subject: [PATCH] Generalize finding tool sheds from runnable Should now work for workflow_edit, test, run etc if targeting a local or docker instance. --- planemo/autoupdate.py | 40 +----------------- planemo/commands/cmd_autoupdate.py | 8 ---- planemo/galaxy/config.py | 65 +++++++++++++++++++++++++----- planemo/galaxy/workflows.py | 37 +++++++++++------ tests/test_autoupdate.py | 6 +-- 5 files changed, 85 insertions(+), 71 deletions(-) diff --git a/planemo/autoupdate.py b/planemo/autoupdate.py index 2d139ab98..1a9324d6b 100644 --- a/planemo/autoupdate.py +++ b/planemo/autoupdate.py @@ -4,7 +4,6 @@ import itertools import re import xml.etree.ElementTree as ET -from string import Template from typing import ( Any, DefaultDict, @@ -26,7 +25,8 @@ import planemo.conda from planemo.galaxy.workflows import ( - guess_tool_shed_url, + get_tool_ids_for_workflow, + get_toolshed_url_for_tool_id, MAIN_TOOLSHED_URL, ) from planemo.io import ( @@ -294,14 +294,6 @@ def get_newest_tool_id(tool_ids: List[str]) -> str: )[-1] -def get_toolshed_url_for_tool_id(tool_id: str) -> Optional[str]: - components = tool_id.split("/repos") - if len(components) > 1: - tool_shed_fqdn = components[0] - return guess_tool_shed_url(tool_shed_fqdn=tool_shed_fqdn) - return None - - def outdated_tools( # noqa: C901 ctx: "PlanemoCliContext", wf_dict: Dict[str, Any], tools_to_skip: List[str] ) -> Dict[str, Dict[str, str]]: @@ -355,22 +347,6 @@ def outdated_tools_rec(wf_dict: Dict[str, Any]) -> None: return outdated_tool_dict -def get_tool_ids_for_workflow(wf_dict: Dict[str, Any], tool_ids: Optional[List[str]] = None) -> List[str]: - tool_ids = [] if tool_ids is None else tool_ids - steps = wf_dict["steps"].values() if isinstance(wf_dict["steps"], dict) else wf_dict["steps"] - for step in steps: - if step.get("type", "tool") == "tool" and not step.get("run", {}).get("class") == "GalaxyWorkflow": - tool_id = step["tool_id"] - tool_ids.append(tool_id) - elif step.get("type") == "subworkflow": # GA SWF - get_tool_ids_for_workflow(step["subworkflow"], tool_ids=tool_ids) - elif step.get("run", {}).get("class") == "GalaxyWorkflow": # gxformat2 SWF - get_tool_ids_for_workflow(step["run"], tool_ids=tool_ids) - else: - continue - return list(dict.fromkeys(tool_ids)) - - def get_tools_to_update( ctx: "PlanemoCliContext", workflow: "Runnable", tools_to_skip: List[str] ) -> Dict[str, Dict[str, str]]: @@ -383,18 +359,6 @@ def get_tools_to_update( return outdated_tools(ctx, wf_dict, tools_to_skip) -def get_shed_tools_conf_string_for_tool_ids(tool_ids: List[str]) -> str: - tool_shed_urls = set(get_toolshed_url_for_tool_id(tool_id) for tool_id in tool_ids if tool_id) - cleaned_tool_shed_urls = set(_ for _ in tool_shed_urls if _ is not None) - TOOL_SHEDS_CONF_TEMPLATE = Template("""${tool_shed_lines}""") - tool_sheds: List[str] = [] - # sort tool_shed_urls from shortest to longest, as https://github.com/galaxyproject/galaxy/blob/c7cb47a1b18ccd5b39075a705bbd2f34572755fe/lib/galaxy/util/tool_shed/tool_shed_registry.py#L106-L118 - # has a bug where a toolshed that is an exact substring of another registered toolshed would wrongly be selected. - for tool_shed_url in sorted(cleaned_tool_shed_urls, key=lambda url: len(url)): - tool_sheds.append(f'') - return TOOL_SHEDS_CONF_TEMPLATE.substitute(tool_shed_lines="".join(tool_sheds)) - - def autoupdate_wf(ctx: "PlanemoCliContext", config: "LocalGalaxyConfig", wf: "Runnable") -> Dict[str, Any]: workflow_id = config.workflow_id_for_runnable(wf) _update_wf(config, workflow_id) diff --git a/planemo/commands/cmd_autoupdate.py b/planemo/commands/cmd_autoupdate.py index 464702b70..40c988810 100644 --- a/planemo/commands/cmd_autoupdate.py +++ b/planemo/commands/cmd_autoupdate.py @@ -135,14 +135,6 @@ def cli(ctx, paths, **kwds): # noqa C901 kwds["install_repository_dependencies"] = False kwds["shed_install"] = True - tool_ids = [] - for workflow in modified_workflows: - with open(workflow.path) as fh: - wf_dict = yaml.safe_load(fh) - tool_ids.extend(autoupdate.get_tool_ids_for_workflow(wf_dict=wf_dict)) - tool_ids = list(dict.fromkeys(tool_ids)) - kwds["tool_sheds_config_content"] = autoupdate.get_shed_tools_conf_string_for_tool_ids(tool_ids) - with engine_context(ctx, **kwds) as galaxy_engine: with galaxy_engine.ensure_runnables_served(modified_workflows) as config: for workflow in modified_workflows: diff --git a/planemo/galaxy/config.py b/planemo/galaxy/config.py index ec7c13409..c23ec429e 100644 --- a/planemo/galaxy/config.py +++ b/planemo/galaxy/config.py @@ -35,7 +35,10 @@ from planemo.config import OptionSource from planemo.deps import ensure_dependency_resolvers_conf_configured from planemo.docker import docker_host_args -from planemo.galaxy.workflows import remote_runnable_to_workflow_id +from planemo.galaxy.workflows import ( + get_toolshed_url_for_tool_id, + remote_runnable_to_workflow_id, +) from planemo.io import ( communicate, kill_pid_file, @@ -48,6 +51,10 @@ write_file, ) from planemo.mulled import build_involucro_context +from planemo.runnable import ( + Runnable, + RunnableType, +) from planemo.shed import tool_shed_url from .api import ( DEFAULT_ADMIN_API_KEY, @@ -258,6 +265,10 @@ def config_join(*args): shed_tool_path = kwds.get("shed_tool_path") or config_join("shed_tools") _ensure_directory(shed_tool_path) + # Find tool sheds to add to config + tool_sheds_config_content = get_tool_sheds_conf_for_runnables(runnables) + if tool_sheds_config_content: + kwds["tool_sheds_config_content"] = tool_sheds_config_content sheds_config_path = _configure_sheds_config_file(ctx, config_directory, **kwds) port = _get_port(kwds) properties = _shared_galaxy_properties(config_directory, kwds, for_tests=for_tests) @@ -326,6 +337,7 @@ def local_galaxy_config(ctx, runnables, for_tests=False, **kwds): test_data_dir = _find_test_data(runnables, **kwds) tool_data_tables = _find_tool_data_table(runnables, test_data_dir=test_data_dir, **kwds) data_manager_config_paths = [r.data_manager_conf_path for r in runnables if r.data_manager_conf_path] + galaxy_root = _find_galaxy_root(ctx, **kwds) install_galaxy = kwds.get("install_galaxy", False) if galaxy_root is not None: @@ -389,6 +401,10 @@ def config_join(*args): shed_tool_path = kwds.get("shed_tool_path") or config_join("shed_tools") _ensure_directory(shed_tool_path) + # Find tool sheds to add to config + tool_sheds_config_content = get_tool_sheds_conf_for_runnables(runnables) + if tool_sheds_config_content: + kwds["tool_sheds_config_content"] = tool_sheds_config_content sheds_config_path = _configure_sheds_config_file(ctx, config_directory, **kwds) database_location = config_join("galaxy.sqlite") @@ -568,19 +584,27 @@ def _all_tool_paths( all_tool_paths = {r.path for r in runnables if r.has_tools and not r.data_manager_conf_path} extra_tools = _expand_paths(galaxy_root, extra_tools=extra_tools) all_tool_paths.update(extra_tools) - for runnable in runnables: - if runnable.type.name == "galaxy_workflow": - tool_ids = find_tool_ids(runnable.path) - for tool_id in tool_ids: - tool_paths = DISTRO_TOOLS_ID_TO_PATH.get(tool_id) - if tool_paths: - if isinstance(tool_paths, str): - tool_paths = [tool_paths] - all_tool_paths.update(tool_paths) + for tool_id in get_tool_ids_for_runnables(runnables): + tool_paths = DISTRO_TOOLS_ID_TO_PATH.get(tool_id) + if tool_paths: + if isinstance(tool_paths, str): + tool_paths = [tool_paths] + all_tool_paths.update(tool_paths) return all_tool_paths +def get_workflow_runnables(runnables: List[Runnable]) -> List[Runnable]: + return [r for r in runnables if r.type == RunnableType.galaxy_workflow and r.has_path] + + +def get_tool_ids_for_runnables(runnables) -> List[str]: + tool_ids = [] + for r in get_workflow_runnables(runnables): + tool_ids.extend(find_tool_ids(r.path)) + return list(dict.fromkeys(tool_ids)) + + def _shared_galaxy_properties(config_directory, kwds, for_tests): """Setup properties useful for local and Docker Galaxy instances. @@ -1201,6 +1225,27 @@ def _search_tool_path_for(path, target, extra_paths=None): return None +def get_tool_sheds_conf_for_runnables(runnables: Optional[List[Runnable]]) -> Optional[str]: + if runnables: + tool_ids = get_tool_ids_for_runnables(runnables) + return get_shed_tools_conf_string_for_tool_ids(tool_ids) + return None + + +def get_shed_tools_conf_string_for_tool_ids(tool_ids: List[str]) -> str: + tool_shed_urls = set(get_toolshed_url_for_tool_id(tool_id) for tool_id in tool_ids if tool_id) + # always add main toolshed + tool_shed_urls.add("https://toolshed.g2.bx.psu.edu/") + cleaned_tool_shed_urls = set(_ for _ in tool_shed_urls if _ is not None) + TOOL_SHEDS_CONF_TEMPLATE = Template("""${tool_shed_lines}""") + tool_sheds: List[str] = [] + # sort tool_shed_urls from shortest to longest, as https://github.com/galaxyproject/galaxy/blob/c7cb47a1b18ccd5b39075a705bbd2f34572755fe/lib/galaxy/util/tool_shed/tool_shed_registry.py#L106-L118 + # has a bug where a toolshed that is an exact substring of another registered toolshed would wrongly be selected. + for shed_url in sorted(cleaned_tool_shed_urls, key=lambda url: len(url)): + tool_sheds.append(f'') + return TOOL_SHEDS_CONF_TEMPLATE.substitute(tool_shed_lines="".join(tool_sheds)) + + def _configure_sheds_config_file(ctx, config_directory, **kwds): contents = kwds.get("tool_sheds_config_content") if not contents: diff --git a/planemo/galaxy/workflows.py b/planemo/galaxy/workflows.py index 9ad6e9abb..48ee9d135 100644 --- a/planemo/galaxy/workflows.py +++ b/planemo/galaxy/workflows.py @@ -61,6 +61,14 @@ def guess_tool_shed_url(tool_shed_fqdn: str) -> Optional[str]: return None +def get_toolshed_url_for_tool_id(tool_id: str) -> Optional[str]: + components = tool_id.split("/repos") + if len(components) > 1: + tool_shed_fqdn = components[0] + return guess_tool_shed_url(tool_shed_fqdn=tool_shed_fqdn) + return None + + def load_shed_repos(runnable): if runnable.type.name != "galaxy_workflow": return [] @@ -164,20 +172,25 @@ def _raw_dict(path, importer=None): return workflow -def find_tool_ids(path): - tool_ids = set() - workflow = _raw_dict(path) - - def register_tool_ids(tool_ids, workflow): - for step in workflow["steps"].values(): - if step.get("subworkflow"): - register_tool_ids(tool_ids, step["subworkflow"]) - elif step.get("tool_id"): - tool_ids.add(step["tool_id"]) +def get_tool_ids_for_workflow(wf_dict: Dict[str, Any], tool_ids: Optional[List[str]] = None) -> List[str]: + tool_ids = [] if tool_ids is None else tool_ids + steps = wf_dict["steps"].values() if isinstance(wf_dict["steps"], dict) else wf_dict["steps"] + for step in steps: + if step.get("type", "tool") == "tool" and not step.get("run", {}).get("class") == "GalaxyWorkflow": + tool_id = step["tool_id"] + tool_ids.append(tool_id) + elif step.get("type") == "subworkflow": # GA SWF + get_tool_ids_for_workflow(step["subworkflow"], tool_ids=tool_ids) + elif step.get("run", {}).get("class") == "GalaxyWorkflow": # gxformat2 SWF + get_tool_ids_for_workflow(step["run"], tool_ids=tool_ids) + else: + continue + return list(dict.fromkeys(tool_ids)) - register_tool_ids(tool_ids, workflow) - return list(tool_ids) +def find_tool_ids(path): + workflow = _raw_dict(path) + return get_tool_ids_for_workflow(workflow) WorkflowOutput = namedtuple("WorkflowOutput", ["order_index", "output_name", "label", "optional"]) diff --git a/tests/test_autoupdate.py b/tests/test_autoupdate.py index 005244ed6..b4a108f90 100644 --- a/tests/test_autoupdate.py +++ b/tests/test_autoupdate.py @@ -3,9 +3,9 @@ from galaxy.util import parse_xml_string -from planemo.autoupdate import ( - get_newest_tool_id, - get_shed_tools_conf_string_for_tool_ids, +from planemo.autoupdate import get_newest_tool_id +from planemo.galaxy.config import get_shed_tools_conf_string_for_tool_ids +from planemo.galaxy.workflows import ( get_tool_ids_for_workflow, get_toolshed_url_for_tool_id, )