Skip to content

Commit

Permalink
Merge pull request #16964 from davelopez/invenio_external_storage_wor…
Browse files Browse the repository at this point in the history
…karound

Enhance Invenio RDM integration
  • Loading branch information
jmchilton authored Nov 2, 2023
2 parents 8dad464 + 85dbb92 commit 2baee6a
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 10 deletions.
8 changes: 7 additions & 1 deletion lib/galaxy/files/sources/_rdm.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def _serialization_props(self, user_context: OptionalUserContext = None) -> RDMF
for key, val in self._props.items():
effective_props[key] = self._evaluate_prop(val, user_context=user_context)
effective_props["url"] = self._repository_url
effective_props["token"] = self.get_authorization_token(user_context)
effective_props["token"] = self.safe_get_authorization_token(user_context)
return cast(RDMFilesSourceProperties, effective_props)

def get_authorization_token(self, user_context: OptionalUserContext) -> str:
Expand All @@ -206,3 +206,9 @@ def get_authorization_token(self, user_context: OptionalUserContext) -> str:
if token is None:
raise AuthenticationRequired(f"No authorization token provided in user's settings for '{self.label}'")
return token

def safe_get_authorization_token(self, user_context: OptionalUserContext) -> Optional[str]:
try:
return self.get_authorization_token(user_context)
except AuthenticationRequired:
return None
47 changes: 38 additions & 9 deletions lib/galaxy/files/sources/invenio.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,25 +260,54 @@ def download_file_from_record(
user_context: OptionalUserContext = None,
):
download_file_content_url = self._get_download_file_url(record_id, filename, user_context)
headers = self._get_request_headers(user_context)
req = urllib.request.Request(download_file_content_url, headers=headers)
with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page:
f = open(file_path, "wb")
return stream_to_open_named_file(
page, f.fileno(), file_path, source_encoding=get_charset_from_http_headers(page.headers)
)
headers = {}
if self._is_api_url(download_file_content_url):
# pass the token as a header only when using the API
headers = self._get_request_headers(user_context)
try:
req = urllib.request.Request(download_file_content_url, headers=headers)
with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page:
f = open(file_path, "wb")
return stream_to_open_named_file(
page, f.fileno(), file_path, source_encoding=get_charset_from_http_headers(page.headers)
)
except urllib.error.HTTPError as e:
# TODO: We can only download files from published records for now
if e.code in [401, 403, 404]:
raise Exception(
f"Cannot download file '{filename}' from record '{record_id}'. Please make sure the record exists and it is public."
)

def _get_download_file_url(self, record_id: str, filename: str, user_context: OptionalUserContext = None):
"""Get the URL to download a file from a record.
This method is used to download files from both published and draft records that are accessible by the user.
"""
is_draft_record = self._is_draft_record(record_id, user_context)
download_file_content_url = f"{self.records_url}/{record_id}/files/{quote(filename)}/content"
file_details_url = f"{self.records_url}/{record_id}/files/{quote(filename)}"
download_file_content_url = f"{file_details_url}/content"
if is_draft_record:
download_file_content_url = download_file_content_url.replace("/files/", "/draft/files/")
file_details_url = self._to_draft_url(file_details_url)
download_file_content_url = self._to_draft_url(download_file_content_url)
file_details = self._get_response(user_context, file_details_url)
if not self._can_download_from_api(file_details):
# TODO: This is a temporary workaround for the fact that the "content" API
# does not support downloading files from S3 or other remote storage classes.
# More info: https://inveniordm.docs.cern.ch/reference/file_storage/#remote-files-r
download_file_content_url = f"{file_details_url.replace('/api','')}?download=1"
return download_file_content_url

def _is_api_url(self, url: str) -> bool:
return "/api/" in url

def _to_draft_url(self, url: str) -> str:
return url.replace("/files/", "/draft/files/")

def _can_download_from_api(self, file_details: dict) -> bool:
# Only files stored locally seems to be fully supported by the API for now
# More info: https://inveniordm.docs.cern.ch/reference/file_storage/
return file_details["storage_class"] == "L"

def _is_draft_record(self, record_id: str, user_context: OptionalUserContext = None):
request_url = self._get_draft_record_url(record_id)
headers = self._get_request_headers(user_context)
Expand Down

0 comments on commit 2baee6a

Please sign in to comment.