From a6ff9cb07a3ec5a44f9e78c5891876017f6b61ba Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Sat, 27 Jul 2024 19:01:45 +0200 Subject: [PATCH 1/5] rewrite tool linters defined in planemo --- planemo/commands/cmd_lint.py | 21 +---- planemo/commands/cmd_shed_lint.py | 8 +- planemo/lint.py | 90 -------------------- planemo/linters/biocontainer_registered.py | 39 ++++++--- planemo/linters/conda_requirements.py | 79 +++++++++++------ planemo/linters/doi.py | 99 +++++++++++++++++++++- planemo/linters/urls.py | 83 +++++++++++++++++- planemo/options.py | 36 ++++++++ planemo/tool_lint.py | 2 + 9 files changed, 294 insertions(+), 163 deletions(-) diff --git a/planemo/commands/cmd_lint.py b/planemo/commands/cmd_lint.py index f6ca5a454..05d55bc3c 100644 --- a/planemo/commands/cmd_lint.py +++ b/planemo/commands/cmd_lint.py @@ -20,25 +20,7 @@ @options.fail_level_option() @options.skip_options() @options.recursive_option() -@click.option( - "--urls", - is_flag=True, - default=False, - help="Check validity of URLs in XML files", -) -@click.option( - "--doi", - is_flag=True, - default=False, - help="Check validity of DOIs in XML files", -) -@click.option( - "--conda_requirements", - is_flag=True, - default=False, - help="Check tool requirements for availability in best practice Conda channels.", -) -@options.lint_biocontainers_option() +@options.lint_planemo_defined_tool_linters_options() # @click.option( # "--verify", # is_flag=True, @@ -48,6 +30,7 @@ @command_function def cli(ctx: PlanemoCliContext, uris, **kwds): """Check for common errors and best practices.""" + print("LINT") lint_args = build_tool_lint_args(ctx, **kwds) exit_code = lint_tools_on_path(ctx, uris, lint_args, recursive=kwds["recursive"]) diff --git a/planemo/commands/cmd_shed_lint.py b/planemo/commands/cmd_shed_lint.py index 70bb5c9d4..b696bb260 100644 --- a/planemo/commands/cmd_shed_lint.py +++ b/planemo/commands/cmd_shed_lint.py @@ -30,13 +30,7 @@ "to allow automated creation and/or updates." ), ) -@click.option( - "--urls", - is_flag=True, - default=False, - help="Check validity of URLs in XML files", -) -@options.lint_biocontainers_option() +@options.lint_planemo_defined_tool_linters_options() # @click.option( # "--verify", # is_flag=True, diff --git a/planemo/lint.py b/planemo/lint.py index 87bfd5aaa..6526d6a8f 100644 --- a/planemo/lint.py +++ b/planemo/lint.py @@ -6,16 +6,13 @@ Dict, TYPE_CHECKING, ) -from urllib.request import urlopen -import requests from galaxy.tool_util.lint import ( LintContext, Linter, ) from planemo.io import error -from planemo.shed import find_urls_for_xml from planemo.xml import validation if TYPE_CHECKING: @@ -71,46 +68,6 @@ def handle_lint_complete(lint_ctx, lint_args, failed=False): return 1 if failed else 0 -def lint_dois(tool_xml, lint_ctx): - """Find referenced DOIs and check they have valid with https://doi.org.""" - dois = find_dois_for_xml(tool_xml) - for publication in dois: - is_doi(publication, lint_ctx) - - -def find_dois_for_xml(tool_xml): - dois = [] - for element in tool_xml.getroot().findall("citations"): - for citation in list(element): - if citation.tag == "citation" and citation.attrib.get("type", "") == "doi": - dois.append(citation.text) - return dois - - -def is_doi(publication_id, lint_ctx): - """Check if dx.doi knows about the ``publication_id``.""" - base_url = "https://doi.org" - if publication_id is None: - lint_ctx.error("Empty DOI citation") - return - publication_id = publication_id.strip() - doiless_publication_id = publication_id.split("doi:", 1)[-1] - if not doiless_publication_id: - lint_ctx.error("Empty DOI citation") - return - url = f"{base_url}/{doiless_publication_id}" - r = requests.get(url) - if r.status_code == 200: - if publication_id != doiless_publication_id: - lint_ctx.error("%s is valid, but Galaxy expects DOI without 'doi:' prefix" % publication_id) - else: - lint_ctx.info("%s is a valid DOI" % publication_id) - elif r.status_code == 404: - lint_ctx.error("%s is not a valid DOI" % publication_id) - else: - lint_ctx.warn("dx.doi returned unexpected status code %d" % r.status_code) - - def lint_xsd(lint_ctx, schema_path, path): """Lint XML at specified path with supplied schema.""" name = lint_ctx.object_name or os.path.basename(path) @@ -124,55 +81,8 @@ def lint_xsd(lint_ctx, schema_path, path): lint_ctx.info("File validates against XML schema.") -def lint_urls(root, lint_ctx): - """Find referenced URLs and verify they are valid.""" - urls, docs = find_urls_for_xml(root) - - # This is from Google Chome on macOS, current at time of writing: - BROWSER_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36" - - def validate_url(url, lint_ctx, user_agent=None): - is_valid = True - if url.startswith("http://") or url.startswith("https://"): - if user_agent: - headers = {"User-Agent": user_agent, "Accept": "*/*"} - else: - headers = None - r = None - try: - r = requests.get(url, headers=headers, stream=True) - r.raise_for_status() - next(r.iter_content(1000)) - except Exception as e: - if r is not None and r.status_code == 429: - # too many requests - pass - if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: - # CloudFlare protection block - pass - else: - is_valid = False - lint_ctx.error(f"Error '{e}' accessing {url}") - else: - try: - with urlopen(url) as handle: - handle.read(100) - except Exception as e: - is_valid = False - lint_ctx.error(f"Error '{e}' accessing {url}") - if is_valid: - lint_ctx.info("URL OK %s" % url) - - for url in urls: - validate_url(url, lint_ctx) - for url in docs: - validate_url(url, lint_ctx, BROWSER_USER_AGENT) - - __all__ = ( "build_lint_args", "handle_lint_complete", - "lint_dois", - "lint_urls", "lint_xsd", ) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 96bc1b937..3931c81c2 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,9 +1,14 @@ """Ensure best-practice biocontainer registered for this tool.""" +from typing import TYPE_CHECKING + from galaxy.tool_util.deps.container_resolvers.mulled import targets_to_mulled_name -from galaxy.tool_util.deps.mulled.util import build_target +from galaxy.tool_util.deps.mulled.mulled_build_tool import requirements_to_mulled_targets +from galaxy.tool_util.lint import Linter -from planemo.conda import tool_source_conda_targets +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser.interface import ToolSource MESSAGE_WARN_NO_REQUIREMENTS = "No valid package requirement tags found to infer BioContainer from." MESSAGE_WARN_NO_CONTAINER = "Failed to find a BioContainer registered for these requirements." @@ -12,18 +17,24 @@ lint_tool_types = ["*"] -def lint_biocontainer_registered(tool_source, lint_ctx): - conda_targets = tool_source_conda_targets(tool_source) - if not conda_targets: - lint_ctx.warn(MESSAGE_WARN_NO_REQUIREMENTS) - return - - mulled_targets = [build_target(c.package, c.version) for c in conda_targets] - name = mulled_container_name("biocontainers", mulled_targets) - if name: - lint_ctx.info(MESSAGE_INFO_FOUND_BIOCONTAINER % name) - else: - lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER) +class BiocontainerValid(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + requirements, *_ = tool_source.parse_requirements_and_containers() + targets = requirements_to_mulled_targets(requirements) + name = mulled_container_name("biocontainers", targets) + if name: + lint_ctx.info(MESSAGE_INFO_FOUND_BIOCONTAINER % name, linter=cls.name(), node=requirements) + + +class BiocontainerMissing(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + requirements, *_ = tool_source.parse_requirements_and_containers() + targets = requirements_to_mulled_targets(requirements) + name = mulled_container_name("biocontainers", targets) + if not name: + lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) def mulled_container_name(namespace, targets): diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 16a97e4f3..3764becb9 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -1,37 +1,62 @@ """Ensure requirements are matched in best practice conda channels.""" +from typing import TYPE_CHECKING + +from galaxy.tool_util.deps.conda_util import requirement_to_conda_targets +from galaxy.tool_util.lint import Linter + from planemo.conda import ( BEST_PRACTICE_CHANNELS, best_practice_search, - tool_source_conda_targets, ) +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser.interface import ToolSource + lint_tool_types = ["*"] -def lint_requirements_in_conda(tool_source, lint_ctx): - """Check requirements of tool source against best practice Conda channels.""" - conda_targets = tool_source_conda_targets(tool_source) - if not conda_targets: - lint_ctx.warn("No valid package requirement tags found to check against Conda.") - return - - for conda_target in conda_targets: - (best_hit, exact) = best_practice_search(conda_target) - conda_target_str = conda_target.package - if conda_target.version: - conda_target_str += "@%s" % (conda_target.version) - if best_hit and exact: - template = "Requirement [%s] matches target in best practice Conda channel [%s]." - message = template % (conda_target_str, best_hit.get("channel")) - lint_ctx.info(message) - elif best_hit: - template = ( - "Requirement [%s] doesn't exactly match available version [%s] in best practice Conda channel [%s]." - ) - message = template % (conda_target_str, best_hit["version"], best_hit.get("channel")) - lint_ctx.warn(message) - else: - template = "Requirement [%s] doesn't match any recipe in a best practice conda channel [%s]." - message = template % (conda_target_str, BEST_PRACTICE_CHANNELS) - lint_ctx.warn(message) +class CondaRequirementValid(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + for conda_target, requirement in _requirements_conda_targets(tool_source): + (best_hit, exact) = best_practice_search(conda_target) + conda_target_str = conda_target.package + if conda_target.version: + conda_target_str += "@%s" % (conda_target.version) + if best_hit and exact: + message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." + lint_ctx.info(message, linter=cls.name(), node=requirement) + + +class CondaRequirementInexact(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + for conda_target, requirement in _requirements_conda_targets(tool_source): + (best_hit, exact) = best_practice_search(conda_target) + conda_target_str = conda_target.package + if conda_target.version: + conda_target_str += "@%s" % (conda_target.version) + if best_hit and not exact: + message = f"Requirement [{conda_target_str}] doesn't exactly match available version [{best_hit['version']}] in best practice Conda channel [{best_hit.get('channel')}]." + lint_ctx.warn(message, linter=cls.name(), node=requirement) + + +class CondaRequirementMissing(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + for conda_target, requirement in _requirements_conda_targets(tool_source): + (best_hit, exact) = best_practice_search(conda_target) + conda_target_str = conda_target.package + if conda_target.version: + conda_target_str += "@%s" % (conda_target.version) + if best_hit and not exact: + message = f"Requirement [{conda_target_str}] doesn't match any recipe in a best practice conda channel ['{BEST_PRACTICE_CHANNELS}']." + lint_ctx.warn(message, linter=cls.name(), node=requirement) + + +def _requirements_conda_targets(tool_source): + requirements, *_ = tool_source.parse_requirements_and_containers() + for requirement in requirements: + yield requirement_to_conda_targets(requirement), requirement diff --git a/planemo/linters/doi.py b/planemo/linters/doi.py index 544eb590b..34ac80d4c 100644 --- a/planemo/linters/doi.py +++ b/planemo/linters/doi.py @@ -1,8 +1,101 @@ """ Tool linting module that lints Galaxy tools for their DOIs (if a DOI type citation is present) """ -import planemo.lint +from typing import TYPE_CHECKING +import requests +from galaxy.tool_util.lint import Linter -def lint_tool_dois(tool_xml, lint_ctx): - planemo.lint.lint_dois(tool_xml, lint_ctx) +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser.interface import ToolSource + + +class DoiEmptyNone(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, *_ in _doi_citations(tool_xml): + if citation.text is None: + lint_ctx.error("Empty DOI citation", linter=cls.name(), node=citation) + + +class DoiEmpty(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): + if citation.text is None: + continue + if not doiless_publication_id: + lint_ctx.error("Empty DOI citation", linter=cls.name(), node=citation) + + +class DoiValid(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): + if citation.text is None or not doiless_publication_id: + continue + url = f"https://doi.org/{doiless_publication_id}" + r = requests.get(url) + if r.status_code == 200 and publication_id == doiless_publication_id: + lint_ctx.info("%s is a valid DOI" % publication_id, linter=cls.name(), node=citation) + + +class DoiValidWithDoi(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): + if citation.text is None or not doiless_publication_id: + continue + url = f"https://doi.org/{doiless_publication_id}" + r = requests.get(url) + if r.status_code == 200 and publication_id != doiless_publication_id: + lint_ctx.error( + "%s is valid, but Galaxy expects DOI without 'doi:' prefix" % publication_id, + linter=cls.name(), + node=citation, + ) + + +class DoiInvalid(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): + if citation.text is None or not doiless_publication_id: + continue + url = f"https://doi.org/{doiless_publication_id}" + r = requests.get(url) + if r.status_code == 404: + lint_ctx.error("%s is not a valid DOI" % publication_id, linter=cls.name(), node=citation) + + +class DoiUnexpectedResponse(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): + if citation.text is None or not doiless_publication_id: + continue + url = f"https://doi.org/{doiless_publication_id}" + r = requests.get(url) + if r.status_code not in [200, 400]: + lint_ctx.warn( + "dx.doi returned unexpected status code %d" % r.status_code, linter=cls.name(), node=citation + ) + + +def _doi_citations(tool_xml): + for element in tool_xml.getroot().findall("citations"): + for citation in list(element): + if citation.tag == "citation" and citation.attrib.get("type", "") == "doi": + if citation.text is None: + publication_id = doiless_publication_id = None + else: + publication_id = citation.text.strip() + doiless_publication_id = publication_id.split("doi:", 1)[-1] + yield citation, publication_id, doiless_publication_id diff --git a/planemo/linters/urls.py b/planemo/linters/urls.py index ae41a2567..ee0cbf0ba 100644 --- a/planemo/linters/urls.py +++ b/planemo/linters/urls.py @@ -1,8 +1,85 @@ """ Tool linting module that lints Galaxy tools for their URLs """ +from typing import TYPE_CHECKING -import planemo.lint +import requests +from galaxy.tool_util.lint import Linter +from urllib.request import urlopen +from planemo.shed import _find_urls_in_text -def lint_tool_urls(tool_source, lint_ctx): - planemo.lint.lint_urls(tool_source.root, lint_ctx) +if TYPE_CHECKING: + from galaxy.tool_util.lint import LintContext + from galaxy.tool_util.parser.interface import ToolSource + +BROWSER_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36" + + +def find_urls_in_help(root): + for help in root.findall("help"): + for url in _find_urls_in_text(help.text): + yield url[0], help + + +class URLInaccessibleHttp(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for url, help in find_urls_in_help(tool_xml): + if url.startswith("http://") or url.startswith("https://"): + headers = {"User-Agent": BROWSER_USER_AGENT, "Accept": "*/*"} + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception as e: + if r is not None and r.status_code == 429: + # too many requests + pass + if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: + # CloudFlare protection block + pass + else: + lint_ctx.error(f"Error '{e}' accessing {url}", linter=cls.name(), node=help) + + +class URLInaccessible(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for url, help in find_urls_in_help(tool_xml): + if not url.startswith("http://") and not url.startswith("https://"): + try: + with urlopen(url) as handle: + handle.read(100) + except Exception as e: + lint_ctx.error(f"Error '{e}' accessing {url}", linter=cls.name(), node=help) + + +class URLValid(Linter): + @classmethod + def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): + tool_xml = getattr(tool_source, "xml_tree", None) + for url, help in find_urls_in_help(tool_xml): + is_valid = True + if url.startswith("http://") or url.startswith("https://"): + headers = {"User-Agent": BROWSER_USER_AGENT, "Accept": "*/*"} + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception: + if r is None or ( + r.status_code != 429 and not (r.status_code in [403, 503] and "cloudflare" in r.text) + ): + is_valid = False + else: + try: + with urlopen(url) as handle: + handle.read(100) + except Exception: + is_valid = False + if is_valid: + lint_ctx.info("URL OK %s" % url, linter=cls.name(), node=help) diff --git a/planemo/options.py b/planemo/options.py index 867d637c2..90e3652d0 100644 --- a/planemo/options.py +++ b/planemo/options.py @@ -1394,6 +1394,42 @@ def lint_biocontainers_option(): ) +def lint_urls(): + return planemo_option( + "--urls", + is_flag=True, + default=False, + help="Check validity of URLs in XML files", + ) + + +def lint_doi(): + return planemo_option( + "--doi", + is_flag=True, + default=False, + help="Check validity of DOIs in XML files", + ) + + +def lint_conda_requirements(): + return planemo_option( + "--conda_requirements", + is_flag=True, + default=False, + help="Check tool requirements for availability in best practice Conda channels.", + ) + + +def lint_planemo_defined_tool_linters_options(): + return _compose( + lint_urls(), + lint_doi(), + lint_conda_requirements, + lint_biocontainers_option(), + ) + + def report_level_option(): return planemo_option( "--report_level", diff --git a/planemo/tool_lint.py b/planemo/tool_lint.py index eea2ad7ff..91e9809b7 100644 --- a/planemo/tool_lint.py +++ b/planemo/tool_lint.py @@ -37,10 +37,12 @@ def build_tool_lint_args(ctx: "PlanemoCliContext", **kwds) -> Dict[str, Any]: lint_args = build_lint_args(ctx, **kwds) extra_modules = _lint_extra_modules(**kwds) lint_args["extra_modules"] = extra_modules + print(f"{extra_modules=}") return lint_args def lint_tools_on_path(ctx, paths, lint_args, **kwds): + print(f"{lint_args}") assert_tools = kwds.get("assert_tools", True) recursive = kwds.get("recursive", False) exit_codes = [] From 7aca51a90f6a7590dd52464d6f24d96bd6fffc23 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 11:26:12 +0200 Subject: [PATCH 2/5] add typing --- planemo/linters/biocontainer_registered.py | 14 ++++++++--- planemo/linters/conda_requirements.py | 27 ++++++++++++++-------- planemo/linters/doi.py | 15 +++++++++++- planemo/linters/urls.py | 10 +++++++- planemo/options.py | 2 +- planemo/shed_lint.py | 3 ++- 6 files changed, 55 insertions(+), 16 deletions(-) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 3931c81c2..01ac56349 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,12 +1,18 @@ """Ensure best-practice biocontainer registered for this tool.""" -from typing import TYPE_CHECKING +from typing import ( + Optional, + TYPE_CHECKING, +) from galaxy.tool_util.deps.container_resolvers.mulled import targets_to_mulled_name from galaxy.tool_util.deps.mulled.mulled_build_tool import requirements_to_mulled_targets from galaxy.tool_util.lint import Linter +from .util import xml_node_from_toolsource + if TYPE_CHECKING: + from galaxy.tool_util.deps.conda_util import CondaTarget from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource @@ -24,7 +30,8 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): targets = requirements_to_mulled_targets(requirements) name = mulled_container_name("biocontainers", targets) if name: - lint_ctx.info(MESSAGE_INFO_FOUND_BIOCONTAINER % name, linter=cls.name(), node=requirements) + requirements_node = xml_node_from_toolsource(tool_source, "requirements") + lint_ctx.info(MESSAGE_INFO_FOUND_BIOCONTAINER % name, linter=cls.name(), node=requirements_node) class BiocontainerMissing(Linter): @@ -34,10 +41,11 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): targets = requirements_to_mulled_targets(requirements) name = mulled_container_name("biocontainers", targets) if not name: + requirements_node = xml_node_from_toolsource(tool_source, "requirements") lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) -def mulled_container_name(namespace, targets): +def mulled_container_name(namespace: str, targets: List[CondaTarget]) -> Optional[str]: name = targets_to_mulled_name(targets=targets, hash_func="v2", namespace=namespace) if name: return f"quay.io/{namespace}/{name}" diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 3764becb9..3f13d7bd8 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -1,10 +1,14 @@ """Ensure requirements are matched in best practice conda channels.""" -from typing import TYPE_CHECKING +from typing import ( + Generator, + TYPE_CHECKING, +) from galaxy.tool_util.deps.conda_util import requirement_to_conda_targets from galaxy.tool_util.lint import Linter +from .util import xml_node_from_toolsource from planemo.conda import ( BEST_PRACTICE_CHANNELS, best_practice_search, @@ -20,43 +24,48 @@ class CondaRequirementValid(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): - for conda_target, requirement in _requirements_conda_targets(tool_source): + for conda_target in _requirements_conda_targets(tool_source): (best_hit, exact) = best_practice_search(conda_target) conda_target_str = conda_target.package if conda_target.version: conda_target_str += "@%s" % (conda_target.version) if best_hit and exact: message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." - lint_ctx.info(message, linter=cls.name(), node=requirement) + requirements_node = xml_node_from_toolsource(tool_source, "requirements") + lint_ctx.info(message, linter=cls.name(), node=requirements_nodes) class CondaRequirementInexact(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): - for conda_target, requirement in _requirements_conda_targets(tool_source): + for conda_target in _requirements_conda_targets(tool_source): (best_hit, exact) = best_practice_search(conda_target) conda_target_str = conda_target.package if conda_target.version: conda_target_str += "@%s" % (conda_target.version) if best_hit and not exact: message = f"Requirement [{conda_target_str}] doesn't exactly match available version [{best_hit['version']}] in best practice Conda channel [{best_hit.get('channel')}]." - lint_ctx.warn(message, linter=cls.name(), node=requirement) + requirements_node = xml_node_from_toolsource(tool_source, "requirements") + lint_ctx.warn(message, linter=cls.name(), node=requirements_node) class CondaRequirementMissing(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): - for conda_target, requirement in _requirements_conda_targets(tool_source): + for conda_target in _requirements_conda_targets(tool_source): (best_hit, exact) = best_practice_search(conda_target) conda_target_str = conda_target.package if conda_target.version: conda_target_str += "@%s" % (conda_target.version) if best_hit and not exact: message = f"Requirement [{conda_target_str}] doesn't match any recipe in a best practice conda channel ['{BEST_PRACTICE_CHANNELS}']." - lint_ctx.warn(message, linter=cls.name(), node=requirement) + requirements_node = xml_node_from_toolsource(tool_source, "requirements") + lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source): +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator[CondaTarget]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: - yield requirement_to_conda_targets(requirement), requirement + conda_target = requirement_to_conda_targets(requirement) + if conda_target: + yield conda_target diff --git a/planemo/linters/doi.py b/planemo/linters/doi.py index 34ac80d4c..04f0fa413 100644 --- a/planemo/linters/doi.py +++ b/planemo/linters/doi.py @@ -9,12 +9,15 @@ if TYPE_CHECKING: from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource + from galaxy.util import ElementTree class DoiEmptyNone(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, *_ in _doi_citations(tool_xml): if citation.text is None: lint_ctx.error("Empty DOI citation", linter=cls.name(), node=citation) @@ -24,6 +27,8 @@ class DoiEmpty(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): if citation.text is None: continue @@ -35,6 +40,8 @@ class DoiValid(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): if citation.text is None or not doiless_publication_id: continue @@ -48,6 +55,8 @@ class DoiValidWithDoi(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): if citation.text is None or not doiless_publication_id: continue @@ -65,6 +74,8 @@ class DoiInvalid(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): if citation.text is None or not doiless_publication_id: continue @@ -78,6 +89,8 @@ class DoiUnexpectedResponse(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for citation, publication_id, doiless_publication_id in _doi_citations(tool_xml): if citation.text is None or not doiless_publication_id: continue @@ -89,7 +102,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): ) -def _doi_citations(tool_xml): +def _doi_citations(tool_xml: "ElementTree"): for element in tool_xml.getroot().findall("citations"): for citation in list(element): if citation.tag == "citation" and citation.attrib.get("type", "") == "doi": diff --git a/planemo/linters/urls.py b/planemo/linters/urls.py index ee0cbf0ba..bf374a463 100644 --- a/planemo/linters/urls.py +++ b/planemo/linters/urls.py @@ -1,5 +1,6 @@ """ Tool linting module that lints Galaxy tools for their URLs """ + from typing import TYPE_CHECKING import requests @@ -11,11 +12,12 @@ if TYPE_CHECKING: from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource + from galaxy.util import ElementTree BROWSER_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36" -def find_urls_in_help(root): +def find_urls_in_help(root: "ElementTree"): for help in root.findall("help"): for url in _find_urls_in_text(help.text): yield url[0], help @@ -25,6 +27,8 @@ class URLInaccessibleHttp(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for url, help in find_urls_in_help(tool_xml): if url.startswith("http://") or url.startswith("https://"): headers = {"User-Agent": BROWSER_USER_AGENT, "Accept": "*/*"} @@ -48,6 +52,8 @@ class URLInaccessible(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for url, help in find_urls_in_help(tool_xml): if not url.startswith("http://") and not url.startswith("https://"): try: @@ -61,6 +67,8 @@ class URLValid(Linter): @classmethod def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): tool_xml = getattr(tool_source, "xml_tree", None) + if not tool_xml: + return for url, help in find_urls_in_help(tool_xml): is_valid = True if url.startswith("http://") or url.startswith("https://"): diff --git a/planemo/options.py b/planemo/options.py index 90e3652d0..005a22b66 100644 --- a/planemo/options.py +++ b/planemo/options.py @@ -1425,7 +1425,7 @@ def lint_planemo_defined_tool_linters_options(): return _compose( lint_urls(), lint_doi(), - lint_conda_requirements, + lint_conda_requirements(), lint_biocontainers_option(), ) diff --git a/planemo/shed_lint.py b/planemo/shed_lint.py index b3d03760b..62d2ecdcc 100644 --- a/planemo/shed_lint.py +++ b/planemo/shed_lint.py @@ -12,7 +12,6 @@ from planemo.io import info from planemo.lint import ( handle_lint_complete, - lint_urls, lint_xsd, setup_lint, ) @@ -189,6 +188,8 @@ def lint_readme(realized_repository, lint_ctx): def lint_tool_dependencies_urls(realized_repository, lint_ctx): + + path = realized_repository.real_path tool_dependencies = os.path.join(path, "tool_dependencies.xml") if not os.path.exists(tool_dependencies): From a924bdd56689752ea5f79430c0548101e5c1a76b Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 11:47:22 +0200 Subject: [PATCH 3/5] fix linter errors and restore lint_urls for lint_tool_dependencies_urls --- planemo/linters/biocontainer_registered.py | 7 ++-- planemo/linters/conda_requirements.py | 7 ++-- planemo/linters/urls.py | 2 +- planemo/shed_lint.py | 42 ++++++++++++++++++++-- 4 files changed, 50 insertions(+), 8 deletions(-) diff --git a/planemo/linters/biocontainer_registered.py b/planemo/linters/biocontainer_registered.py index 01ac56349..ee14f86d8 100644 --- a/planemo/linters/biocontainer_registered.py +++ b/planemo/linters/biocontainer_registered.py @@ -1,6 +1,7 @@ """Ensure best-practice biocontainer registered for this tool.""" from typing import ( + List, Optional, TYPE_CHECKING, ) @@ -42,10 +43,12 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): name = mulled_container_name("biocontainers", targets) if not name: requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements) + lint_ctx.warn(MESSAGE_WARN_NO_CONTAINER, linter=cls.name(), node=requirements_node) -def mulled_container_name(namespace: str, targets: List[CondaTarget]) -> Optional[str]: +def mulled_container_name(namespace: str, targets: List["CondaTarget"]) -> Optional[str]: name = targets_to_mulled_name(targets=targets, hash_func="v2", namespace=namespace) if name: return f"quay.io/{namespace}/{name}" + else: + return None diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 3f13d7bd8..9fd99402a 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -8,13 +8,14 @@ from galaxy.tool_util.deps.conda_util import requirement_to_conda_targets from galaxy.tool_util.lint import Linter -from .util import xml_node_from_toolsource from planemo.conda import ( BEST_PRACTICE_CHANNELS, best_practice_search, ) +from .util import xml_node_from_toolsource if TYPE_CHECKING: + from galaxy.tool_util.deps.conda_util import CondaTarget from galaxy.tool_util.lint import LintContext from galaxy.tool_util.parser.interface import ToolSource @@ -32,7 +33,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): if best_hit and exact: message = f"Requirement [{conda_target_str}] matches target in best practice Conda channel [{best_hit.get('channel')}]." requirements_node = xml_node_from_toolsource(tool_source, "requirements") - lint_ctx.info(message, linter=cls.name(), node=requirements_nodes) + lint_ctx.info(message, linter=cls.name(), node=requirements_node) class CondaRequirementInexact(Linter): @@ -63,7 +64,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source: "ToolSource") -> Generator[CondaTarget]: +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget"]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: conda_target = requirement_to_conda_targets(requirement) diff --git a/planemo/linters/urls.py b/planemo/linters/urls.py index bf374a463..4d3cca5a7 100644 --- a/planemo/linters/urls.py +++ b/planemo/linters/urls.py @@ -2,10 +2,10 @@ """ from typing import TYPE_CHECKING +from urllib.request import urlopen import requests from galaxy.tool_util.lint import Linter -from urllib.request import urlopen from planemo.shed import _find_urls_in_text diff --git a/planemo/shed_lint.py b/planemo/shed_lint.py index 62d2ecdcc..b68d981f2 100644 --- a/planemo/shed_lint.py +++ b/planemo/shed_lint.py @@ -3,7 +3,9 @@ import os import xml.etree.ElementTree as ET from typing import TYPE_CHECKING +from urllib.request import urlopen +import requests import yaml from galaxy.tool_util.lint import lint_tool_source_with from galaxy.tool_util.linters.help import rst_invalid @@ -17,6 +19,7 @@ ) from planemo.shed import ( CURRENT_CATEGORIES, + find_urls_for_xml, REPO_TYPE_SUITE, REPO_TYPE_TOOL_DEP, REPO_TYPE_UNRESTRICTED, @@ -188,8 +191,43 @@ def lint_readme(realized_repository, lint_ctx): def lint_tool_dependencies_urls(realized_repository, lint_ctx): - - + + def lint_urls(root, lint_ctx): + """Find referenced URLs and verify they are valid. + + note this function was used previously for tools (URLs in help) and tool dependency files + the former has been rewritten and therefore the function has been moved here + """ + urls, _ = find_urls_for_xml(root) + for url in urls: + is_valid = True + if url.startswith("http://") or url.startswith("https://"): + headers = None + r = None + try: + r = requests.get(url, headers=headers, stream=True) + r.raise_for_status() + next(r.iter_content(1000)) + except Exception as e: + if r is not None and r.status_code == 429: + # too many requests + pass + if r is not None and r.status_code in [403, 503] and "cloudflare" in r.text: + # CloudFlare protection block + pass + else: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + else: + try: + with urlopen(url) as handle: + handle.read(100) + except Exception as e: + is_valid = False + lint_ctx.error(f"Error '{e}' accessing {url}") + if is_valid: + lint_ctx.info("URL OK %s" % url) + path = realized_repository.real_path tool_dependencies = os.path.join(path, "tool_dependencies.xml") if not os.path.exists(tool_dependencies): From 595598d2f9f7a3f3cf27583b1ce07b42aaa816db Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Mon, 29 Jul 2024 12:57:56 +0200 Subject: [PATCH 4/5] add missing file --- planemo/linters/util.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 planemo/linters/util.py diff --git a/planemo/linters/util.py b/planemo/linters/util.py new file mode 100644 index 000000000..9eb09c58b --- /dev/null +++ b/planemo/linters/util.py @@ -0,0 +1,16 @@ +from typing import ( + Optional, + TYPE_CHECKING, +) + +if TYPE_CHECKING: + from galaxy.tool_util.parser.interface import ToolSource + from galaxy.util import Element + + +def xml_node_from_toolsource(tool_source: "ToolSource", tag: "str") -> Optional["Element"]: + node = None + xml_tree = getattr(tool_source, "xml_tree", None) + if xml_tree: + node = xml_tree.find(tag) + return node From daa25ec4cf3f8bb486f898214a42f5dccf3d8f7b Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Tue, 30 Jul 2024 15:25:47 +0200 Subject: [PATCH 5/5] typing fix --- planemo/linters/conda_requirements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/planemo/linters/conda_requirements.py b/planemo/linters/conda_requirements.py index 9fd99402a..6c1c4742e 100644 --- a/planemo/linters/conda_requirements.py +++ b/planemo/linters/conda_requirements.py @@ -64,7 +64,7 @@ def lint(cls, tool_source: "ToolSource", lint_ctx: "LintContext"): lint_ctx.warn(message, linter=cls.name(), node=requirements_node) -def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget"]: +def _requirements_conda_targets(tool_source: "ToolSource") -> Generator["CondaTarget", None, None]: requirements, *_ = tool_source.parse_requirements_and_containers() for requirement in requirements: conda_target = requirement_to_conda_targets(requirement)