Skip to content

Commit

Permalink
Implemented "trs" scheme, based on ga4gh/tool-registry-service-schema…
Browse files Browse the repository at this point in the history
…s#164

Tested with dockstore and WorkflowHub (it returns a 500 HTTP error code)
  • Loading branch information
jmfernandez committed Dec 7, 2022
1 parent 830af2d commit 4574603
Showing 1 changed file with 112 additions and 23 deletions.
135 changes: 112 additions & 23 deletions wfexs_backend/fetchers/trs_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@

from typing import (
cast,
Mapping,
Optional,
TYPE_CHECKING,
)

from urllib import parse
Expand All @@ -33,27 +32,39 @@
from . import fetchClassicURL, FetcherException

from ..common import (
AbsPath,
ContentKind,
ProtocolFetcher,
ProtocolFetcherReturn,
SecurityContextConfig,
URIType,
URIWithMetadata,
)

if TYPE_CHECKING:
from typing import (
List,
Mapping,
MutableSequence,
Optional,
Sequence,
)

from ..common import (
AbsPath,
ProtocolFetcher,
ProtocolFetcherReturn,
SecurityContextConfig,
URIType,
)

INTERNAL_TRS_SCHEME_PREFIX = "wfexs.trs.files"
TRS_SCHEME_PREFIX = "trs"

TRS_FILES_SUFFIX = "/files"
TRS_DESCRIPTOR_INFIX = "/descriptor/"


def fetchTRSFiles(
remote_file: URIType,
cachedFilename: AbsPath,
secContext: Optional[SecurityContextConfig] = None,
) -> ProtocolFetcherReturn:
remote_file: "URIType",
cachedFilename: "AbsPath",
secContext: "Optional[SecurityContextConfig]" = None,
) -> "ProtocolFetcherReturn":
"""
Method to download contents from TRS files related to a tool
Expand All @@ -63,14 +74,94 @@ def fetchTRSFiles(
"""

parsedInputURL = parse.urlparse(remote_file)
path_steps: "List[str]" = parsedInputURL.path.split("/")
embedded_remote_file = parsedInputURL.path

if not embedded_remote_file.endswith(TRS_FILES_SUFFIX):
metadata_url = cast(URIType, embedded_remote_file + TRS_FILES_SUFFIX)
else:
metadata_url = cast(URIType, embedded_remote_file)
descriptor_base_url = (
embedded_remote_file[0 : -len(TRS_FILES_SUFFIX)] + TRS_DESCRIPTOR_INFIX
metadata_array: "MutableSequence[URIWithMetadata]" = []
if parsedInputURL.scheme == INTERNAL_TRS_SCHEME_PREFIX:
# TODO: Improve this code
if not embedded_remote_file.endswith(TRS_FILES_SUFFIX):
metadata_url = cast("URIType", embedded_remote_file + TRS_FILES_SUFFIX)
descriptor_base_url = embedded_remote_file + TRS_DESCRIPTOR_INFIX
else:
metadata_url = cast("URIType", embedded_remote_file)
descriptor_base_url = (
embedded_remote_file[0 : -len(TRS_FILES_SUFFIX)] + TRS_DESCRIPTOR_INFIX
)
elif parsedInputURL.scheme == TRS_SCHEME_PREFIX:
# TRS official scheme
if len(path_steps) < 3 or path_steps[0] != "":
raise FetcherException(
f"Ill-formed TRS CURIE {remote_file}. It should be in the format of {TRS_SCHEME_PREFIX}://id/version or {TRS_SCHEME_PREFIX}://prefix-with-slashes/id/version"
)

version_steps = path_steps[0:-2]
version_steps.extend(
["ga4gh", "trs", "v2", "tools", path_steps[-2], "versions", path_steps[-1]]
)
version_metadata_url = cast(
"URIType",
parse.urlunparse(
parse.ParseResult(
scheme="https",
netloc=parsedInputURL.netloc,
path="/".join(version_steps),
params="",
query="",
fragment="",
)
),
)
version_meta = {
"fetched": version_metadata_url,
"payload": None,
}
metadata_array.append(URIWithMetadata(remote_file, version_meta))
try:
metaio = io.BytesIO()
_, metametaio, _ = fetchClassicURL(version_metadata_url, metaio)
version_metadata = json.loads(metaio.getvalue().decode("utf-8"))
version_meta["payload"] = version_metadata
metadata_array.extend(metametaio)

except urllib.error.HTTPError as he:
raise FetcherException(
f"Error fetching or processing TRS version metadata for {remote_file} : {he.code} {he.reason}"
) from he

# At last, we can finish building the URL
new_path_steps = version_steps + [
version_metadata["descriptor_type"][0],
"files",
]

metadata_url = cast(
"URIType",
parse.urlunparse(
parse.ParseResult(
scheme="https",
netloc=parsedInputURL.netloc,
path="/".join(new_path_steps),
params="",
query="",
fragment="",
)
),
)

descriptor_steps = version_steps + [
version_metadata["descriptor_type"][0],
"descriptor",
]
descriptor_base_url = parse.urlunparse(
parse.ParseResult(
scheme="https",
netloc=parsedInputURL.netloc,
path="/".join(descriptor_steps) + "/",
params="",
query="",
fragment="",
)
)

topMeta = {
Expand All @@ -80,15 +171,12 @@ def fetchTRSFiles(
"remote_workflow_entrypoint": None,
}
metadata_array = [URIWithMetadata(remote_file, topMeta)]
metaio = None
try:
metaio = io.BytesIO()
_, metametaio, _ = fetchClassicURL(metadata_url, metaio)
metadata = json.loads(metaio.getvalue().decode("utf-8"))
topMeta["payload"] = metadata
metadata_array.extend(metametaio)

metaio = None
except urllib.error.HTTPError as he:
raise FetcherException(
"Error fetching or processing TRS files metadata for {} : {} {}".format(
Expand All @@ -104,8 +192,8 @@ def fetchTRSFiles(
if file_rel_path is not None:
emptyWorkflow = False

file_url = cast(URIType, descriptor_base_url + file_rel_path)
absfile = cast(AbsPath, os.path.join(cachedFilename, file_rel_path))
file_url = cast("URIType", descriptor_base_url + file_rel_path)
absfile = cast("AbsPath", os.path.join(cachedFilename, file_rel_path))

# Intermediate path creation
reldir = os.path.dirname(file_rel_path)
Expand Down Expand Up @@ -146,6 +234,7 @@ def fetchTRSFiles(


# These are schemes from identifiers.org
SCHEME_HANDLERS: Mapping[str, ProtocolFetcher] = {
SCHEME_HANDLERS: "Mapping[str, ProtocolFetcher]" = {
INTERNAL_TRS_SCHEME_PREFIX: fetchTRSFiles,
TRS_SCHEME_PREFIX: fetchTRSFiles,
}

0 comments on commit 4574603

Please sign in to comment.