Skip to content

Commit

Permalink
More tool parsing abstractions, for greater reuse outside main app.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Jul 9, 2024
1 parent 9214962 commit ecb15b3
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 52 deletions.
32 changes: 20 additions & 12 deletions lib/galaxy/managers/citations.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,25 @@
import functools
import logging
from typing import (
Optional,
Union,
)

from beaker.cache import CacheManager
from beaker.util import parse_cache_config_options

from galaxy.structured_app import BasicSharedApp
from galaxy.tool_util.parser.interface import Citation
from galaxy.util import (
DEFAULT_SOCKET_TIMEOUT,
requests,
)

log = logging.getLogger(__name__)

CitationT = Union["BibtexCitation", "DoiCitation"]
OptionalCitationT = Optional[CitationT]


class CitationsManager:
def __init__(self, app: BasicSharedApp) -> None:
Expand All @@ -29,8 +37,8 @@ def citations_for_tool_ids(self, tool_ids):
citation_collection.add(citation)
return citation_collection.citations

def parse_citation(self, citation_elem):
return parse_citation(citation_elem, self)
def parse_citation(self, citation_model: Citation) -> OptionalCitationT:
return parse_citation(citation_model, self)

def _get_tool(self, tool_id):
tool = self.app.toolbox.get_tool(tool_id)
Expand Down Expand Up @@ -67,19 +75,19 @@ def get_bibtex(self, doi):
return self._cache.get(key=doi, createfunc=createfunc)


def parse_citation(elem, citation_manager):
def parse_citation(citation_model: Citation, citation_manager) -> OptionalCitationT:
"""
Parse an abstract citation entry from the specified XML element.
"""
citation_type = elem.attrib.get("type", None)
citation_type = citation_model.type
citation_class = CITATION_CLASSES.get(citation_type, None)
if not citation_class:
log.warning(f"Unknown or unspecified citation type: {citation_type}")
return None
try:
citation = citation_class(elem, citation_manager)
citation = citation_class(citation_model, citation_manager)
except Exception as e:
raise Exception(f"Invalid citation of type '{citation_type}' with content '{elem.text}': {e}")
raise Exception(f"Invalid citation of type '{citation_type}' with content '{citation_model.content}': {e}")
return citation


Expand Down Expand Up @@ -126,18 +134,18 @@ def has_doi(self):


class BibtexCitation(BaseCitation):
def __init__(self, elem, citation_manager):
self.raw_bibtex = elem.text.strip()
def __init__(self, citation_model: Citation, citation_manager: CitationsManager):
self.raw_bibtex = citation_model.content

def to_bibtex(self):
def to_bibtex(self) -> str:
return self.raw_bibtex


class DoiCitation(BaseCitation):
BIBTEX_UNSET = object()

def __init__(self, elem, citation_manager):
self.__doi = elem.text.strip()
def __init__(self, citation_model: Citation, citation_manager: CitationsManager):
self.__doi = citation_model.content
self.doi_cache = citation_manager.doi_cache
self.raw_bibtex = DoiCitation.BIBTEX_UNSET

Expand All @@ -147,7 +155,7 @@ def has_doi(self):
def doi(self):
return self.__doi

def to_bibtex(self):
def to_bibtex(self) -> str:
if self.raw_bibtex is DoiCitation.BIBTEX_UNSET:
try:
self.raw_bibtex = self.doi_cache.get_bibtex(self.__doi)
Expand Down
32 changes: 25 additions & 7 deletions lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

import packaging.version
from pydantic import BaseModel
from typing_extensions import TypedDict

from galaxy.util.path import safe_walk
Expand Down Expand Up @@ -61,6 +62,16 @@ class ToolSourceTests(TypedDict):
tests: List[ToolSourceTest]


class XrefDict(TypedDict):
value: str
reftype: str


class Citation(BaseModel):
type: str
content: str


class ToolSource(metaclass=ABCMeta):
"""This interface represents an abstract source to parse tool
information from.
Expand All @@ -76,7 +87,7 @@ def parse_id(self) -> Optional[str]:
"""

@abstractmethod
def parse_version(self):
def parse_version(self) -> Optional[str]:
"""Parse a version describing the abstract tool."""

def parse_tool_module(self):
Expand All @@ -98,12 +109,15 @@ def parse_tool_type(self):
return None

@abstractmethod
def parse_name(self):
def parse_name(self) -> str:
"""Parse a short name for tool (required)."""

@abstractmethod
def parse_description(self):
"""Parse a description for tool. Longer than name, shorted than help."""
def parse_description(self) -> str:
"""Parse a description for tool. Longer than name, shorted than help.
We parse this out as "" if it isn't explicitly declared.
"""

def parse_edam_operations(self) -> List[str]:
"""Parse list of edam operation codes."""
Expand All @@ -114,7 +128,7 @@ def parse_edam_topics(self) -> List[str]:
return []

@abstractmethod
def parse_xrefs(self) -> List[Dict[str, str]]:
def parse_xrefs(self) -> List[XrefDict]:
"""Parse list of external resource URIs and types."""

def parse_display_interface(self, default):
Expand Down Expand Up @@ -276,13 +290,17 @@ def parse_help(self) -> Optional[str]:
"""

@abstractmethod
def parse_profile(self):
def parse_profile(self) -> Optional[str]:
"""Return tool profile version as Galaxy major e.g. 16.01 or 16.04."""

@abstractmethod
def parse_license(self):
def parse_license(self) -> Optional[str]:
"""Return license corresponding to tool wrapper."""

def parse_citations(self) -> List[Citation]:
"""Return a list of citations."""
return []

@abstractmethod
def parse_python_template_version(self) -> Optional[packaging.version.Version]:
"""
Expand Down
40 changes: 34 additions & 6 deletions lib/galaxy/tool_util/parser/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
)
from .interface import (
AssertionList,
Citation,
InputSource,
PageSource,
PagesSource,
Expand All @@ -41,6 +42,7 @@
ToolSource,
ToolSourceTest,
ToolSourceTests,
XrefDict,
)
from .output_actions import ToolOutputActionGroup
from .output_collection_def import dataset_collector_descriptions_from_elem
Expand Down Expand Up @@ -142,7 +144,7 @@ def mem_optimize(self):
self.root = None
self._xml_tree = None

def parse_version(self):
def parse_version(self) -> Optional[str]:
return self.root.get("version", None)

def parse_id(self):
Expand Down Expand Up @@ -188,17 +190,17 @@ def parse_edam_topics(self):
return []
return [edam_topic.text for edam_topic in edam_topics.findall("edam_topic")]

def parse_xrefs(self):
def parse_xrefs(self) -> List[XrefDict]:
xrefs = self.root.find("xrefs")
if xrefs is None:
return []
return [
dict(value=xref.text.strip(), reftype=xref.attrib["type"])
XrefDict(value=xref.text.strip(), reftype=str(xref.attrib["type"]))
for xref in xrefs.findall("xref")
if xref.get("type")
if xref.get("type") and xref.text
]

def parse_description(self):
def parse_description(self) -> str:
return xml_text(self.root, "description")

def parse_display_interface(self, default):
Expand Down Expand Up @@ -659,9 +661,35 @@ def parse_profile(self) -> str:
# - Enable buggy interpreter attribute.
return self.root.get("profile", "16.01")

def parse_license(self):
def parse_license(self) -> Optional[str]:
return self.root.get("license")

def parse_citations(self) -> List[Citation]:
"""Return a list of citations."""
citations: List[Citation] = []
root = self.root
citations_elem = root.find("citations")
if citations_elem is None:
return citations

for citation_elem in citations_elem:
if citation_elem.tag != "citation":
pass

citation_type = citation_elem.attrib.get("type", None)
citation_raw_text = citation_elem.text
assert citation_raw_text
content = citation_raw_text.strip()

citations.append(
Citation(
type=citation_type,
content=content,
)
)

return citations

def parse_python_template_version(self):
python_template_version = self.root.get("python_template_version")
if python_template_version is not None:
Expand Down
26 changes: 15 additions & 11 deletions lib/galaxy/tool_util/parser/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
ToolSource,
ToolSourceTest,
ToolSourceTests,
XrefDict,
)
from .output_collection_def import dataset_collector_descriptions_from_output_dict
from .output_objects import (
Expand Down Expand Up @@ -51,24 +52,27 @@ def parse_tool_type(self):
def parse_id(self):
return self.root_dict.get("id")

def parse_version(self):
return str(self.root_dict.get("version"))
def parse_version(self) -> Optional[str]:
version_raw = self.root_dict.get("version")
return str(version_raw) if version_raw is not None else None

def parse_name(self):
return self.root_dict.get("name")
def parse_name(self) -> str:
rval = self.root_dict.get("name")
assert rval
return str(rval)

def parse_description(self):
def parse_description(self) -> str:
return self.root_dict.get("description", "")

def parse_edam_operations(self):
def parse_edam_operations(self) -> List[str]:
return self.root_dict.get("edam_operations", [])

def parse_edam_topics(self):
def parse_edam_topics(self) -> List[str]:
return self.root_dict.get("edam_topics", [])

def parse_xrefs(self):
def parse_xrefs(self) -> List[XrefDict]:
xrefs = self.root_dict.get("xrefs", [])
return [dict(value=xref["value"], reftype=xref["type"]) for xref in xrefs if xref["type"]]
return [XrefDict(value=xref["value"], reftype=xref["type"]) for xref in xrefs if xref["type"]]

def parse_sanitize(self):
return self.root_dict.get("sanitize", True)
Expand Down Expand Up @@ -191,10 +195,10 @@ def parse_tests_to_dict(self) -> ToolSourceTests:

return rval

def parse_profile(self):
def parse_profile(self) -> str:
return self.root_dict.get("profile", "16.04")

def parse_license(self):
def parse_license(self) -> Optional[str]:
return self.root_dict.get("license")

def parse_interactivetool(self):
Expand Down
22 changes: 6 additions & 16 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1476,22 +1476,12 @@ def parse_stdio(self, tool_source: ToolSource):
self.stdio_regexes = regexes

def _parse_citations(self, tool_source):
# TODO: Move following logic into ToolSource abstraction.
if not hasattr(tool_source, "root"):
return []

root = tool_source.root
citations: List[str] = []
citations_elem = root.find("citations")
if citations_elem is None:
return citations

for citation_elem in citations_elem:
if citation_elem.tag != "citation":
pass
citations_manager = getattr(self.app, "citations_manager", None)
if citations_manager is not None:
citation = citations_manager.parse_citation(citation_elem)
citation_models = tool_source.parse_citations()
citations_manager = getattr(self.app, "citations_manager", None)
citations = []
if citations_manager is not None:
for citation_model in citation_models:
citation = citations_manager.parse_citation(citation_model)
if citation:
citations.append(citation)
return citations
Expand Down

0 comments on commit ecb15b3

Please sign in to comment.