From 20face60a1b8788e73b6df79a231596ac227b2fd Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Fri, 7 Jul 2023 11:47:42 +0200
Subject: [PATCH 01/41] Add basic Invenio file source plugin
Currently only basic support for listing records and downloading files in public records.
---
lib/galaxy/files/sources/invenio.py | 143 ++++++++++++++++++++++++++++
1 file changed, 143 insertions(+)
create mode 100644 lib/galaxy/files/sources/invenio.py
diff --git a/lib/galaxy/files/sources/invenio.py b/lib/galaxy/files/sources/invenio.py
new file mode 100644
index 000000000000..64e4ca597756
--- /dev/null
+++ b/lib/galaxy/files/sources/invenio.py
@@ -0,0 +1,143 @@
+import ssl
+import urllib.request
+from typing import (
+ cast,
+ Optional,
+)
+from urllib.parse import urljoin
+
+import requests
+from typing_extensions import Unpack
+
+from galaxy.files.sources import (
+ BaseFilesSource,
+ FilesSourceOptions,
+ FilesSourceProperties,
+)
+from galaxy.util import (
+ DEFAULT_SOCKET_TIMEOUT,
+ get_charset_from_http_headers,
+ stream_to_open_named_file,
+)
+
+# TODO: Remove this block. Ignoring SSL errors for testing purposes.
+VERIFY = False
+SSL_CONTEXT = ssl.create_default_context()
+SSL_CONTEXT.check_hostname = False
+SSL_CONTEXT.verify_mode = ssl.CERT_NONE
+
+
+class InvenioFilesSourceProperties(FilesSourceProperties):
+ url: str
+
+
+class InvenioFilesSource(BaseFilesSource):
+ """A files source for Invenio turn-key research data management repository."""
+
+ plugin_type = "inveniordm"
+
+ def __init__(self, **kwd: Unpack[InvenioFilesSourceProperties]):
+ props = self._parse_common_config_opts(kwd)
+ base_url = props.get("url", None)
+ if not base_url:
+ raise Exception("InvenioFilesSource requires a url")
+ self._invenio_url = base_url
+ self._props = props
+
+ def _list(self, path="/", recursive=True, user_context=None, opts: Optional[FilesSourceOptions] = None):
+ is_listing_records = path == "/"
+ if is_listing_records:
+ # TODO: This is limited to 25 records by default. We should add pagination support.
+ request_url = urljoin(self._invenio_url, "api/records")
+ else:
+ # listing a record's files
+ request_url = urljoin(self._invenio_url, f"{path}/files")
+
+ rval = []
+ headers = self._get_request_headers(user_context)
+ response = requests.get(request_url, headers=headers, verify=VERIFY)
+ if response.status_code == 200:
+ response_json = response.json()
+ if is_listing_records:
+ rval = self._get_records_from_response(path, response_json)
+ else:
+ rval = self._get_record_files_from_response(path, response_json)
+ else:
+ raise Exception(f"Request to {request_url} failed with status code {response.status_code}")
+ return rval
+
+ def _get_request_headers(self, user_context):
+ preferences = user_context.preferences if user_context else None
+ token = preferences.get(f"{self.id}|token", None) if preferences else None
+ headers = {"Authorization": f"Bearer {token}"} if token else {}
+ return headers
+
+ def _get_records_from_response(self, path: str, response: dict):
+ records = response["hits"]["hits"]
+ rval = []
+ for record in records:
+ uri = self._to_plugin_uri(record["links"]["self"])
+ rval.append(
+ {
+ "class": "Directory",
+ "name": record["metadata"]["title"],
+ "ctime": record["created"],
+ "uri": uri,
+ "path": path,
+ }
+ )
+
+ return rval
+
+ def _get_record_files_from_response(self, path: str, response: dict):
+ files_enabled = response.get("enabled", False)
+ if not files_enabled:
+ return []
+ entries = response["entries"]
+ rval = []
+ for entry in entries:
+ if entry.get("status") == "completed":
+ uri = self._to_plugin_uri(entry["links"]["content"])
+ rval.append(
+ {
+ "class": "File",
+ "name": entry["key"],
+ "size": entry["size"],
+ "ctime": entry["created"],
+ "uri": uri,
+ "path": path,
+ }
+ )
+ return rval
+
+ def _to_plugin_uri(self, uri: str) -> str:
+ return uri.replace(self._invenio_url, self.get_uri_root())
+
+ def _realize_to(
+ self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ):
+ remote_path = urljoin(self._invenio_url, source_path)
+ # TODO: user_context is always None here when called from a data fetch.
+ # This prevents downloading files that require authentication even if the user provided a token.
+ headers = self._get_request_headers(user_context)
+ req = urllib.request.Request(remote_path, headers=headers)
+ with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT, context=SSL_CONTEXT) as page:
+ f = open(native_path, "wb")
+ return stream_to_open_named_file(
+ page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
+ )
+
+ def _write_from(
+ self, target_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ):
+ raise NotImplementedError()
+
+ def _serialization_props(self, user_context=None) -> InvenioFilesSourceProperties:
+ effective_props = {}
+ for key, val in self._props.items():
+ effective_props[key] = self._evaluate_prop(val, user_context=user_context)
+ effective_props["url"] = self._invenio_url
+ return cast(InvenioFilesSourceProperties, effective_props)
+
+
+__all__ = ("InvenioFilesSource",)
From e06e295f56ecf0b1c9e23e391c3d3e11b523601b Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Fri, 7 Jul 2023 13:31:14 +0200
Subject: [PATCH 02/41] Add Invenio RDM plugin configuration sample
---
lib/galaxy/config/sample/file_sources_conf.yml.sample | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/lib/galaxy/config/sample/file_sources_conf.yml.sample b/lib/galaxy/config/sample/file_sources_conf.yml.sample
index f7ab5e7b40e5..e8fdf524b5d9 100644
--- a/lib/galaxy/config/sample/file_sources_conf.yml.sample
+++ b/lib/galaxy/config/sample/file_sources_conf.yml.sample
@@ -191,3 +191,9 @@
label: Stock DRS filesource
id: drsstock
doc: Make sure to define this generic drs file source if you have defined any other drs file sources, or stock drs download capability will be disabled.
+
+- type: inveniordm
+ id: invenio
+ doc: Invenio RDM turn-key research data management repository
+ label: Invenio RDM Demo Repository
+ url: https://inveniordm.web.cern.ch/
From 6b4d9e783433ab127a7b237968dcf3d32af4360e Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Mon, 10 Jul 2023 16:05:12 +0200
Subject: [PATCH 03/41] Explore record publishing
This has a severe limitation.
We can export a record with files (without much metadata) but we cannot reference it back directly form an export URI.
The reason is how we construct the URI for regular file sources, which obviously are just file paths, while with DOI repositories, we need to differentiate between records and files only.
---
lib/galaxy/files/sources/invenio.py | 268 ++++++++++++++++++++++++----
1 file changed, 229 insertions(+), 39 deletions(-)
diff --git a/lib/galaxy/files/sources/invenio.py b/lib/galaxy/files/sources/invenio.py
index 64e4ca597756..42cd737a0c08 100644
--- a/lib/galaxy/files/sources/invenio.py
+++ b/lib/galaxy/files/sources/invenio.py
@@ -1,13 +1,21 @@
+import datetime
+import json
+import os
import ssl
import urllib.request
from typing import (
cast,
+ List,
Optional,
)
from urllib.parse import urljoin
import requests
-from typing_extensions import Unpack
+from typing_extensions import (
+ Literal,
+ TypedDict,
+ Unpack,
+)
from galaxy.files.sources import (
BaseFilesSource,
@@ -27,10 +35,82 @@
SSL_CONTEXT.verify_mode = ssl.CERT_NONE
+AccessStatus = Literal["public", "restricted"]
+
+
class InvenioFilesSourceProperties(FilesSourceProperties):
url: str
+class ResourceType(TypedDict):
+ id: str
+
+
+class RecordAccess(TypedDict):
+ record: AccessStatus
+ files: AccessStatus
+
+
+class RecordFiles(TypedDict):
+ enabled: bool
+
+
+class IdentifierEntry(TypedDict):
+ scheme: str
+ identifier: str
+
+
+class AffiliationEntry(TypedDict):
+ id: str
+ name: str
+
+
+class RecordPersonOrOrg(TypedDict):
+ family_name: str
+ given_name: str
+ type: Literal["personal", "organizational"]
+ name: str
+ identifiers: List[IdentifierEntry]
+
+
+class Creator(TypedDict):
+ person_or_org: RecordPersonOrOrg
+ affiliations: Optional[List[AffiliationEntry]]
+
+
+class RecordMetadata(TypedDict):
+ title: str
+ resource_type: ResourceType
+ publication_date: str
+ creators: List[Creator]
+
+
+class RecordLinks(TypedDict):
+ self: str
+ self_html: str
+ self_iiif_manifest: str
+ self_iiif_sequence: str
+ files: str
+ record: str
+ record_html: str
+ publish: str
+ review: str
+ versions: str
+ access_links: str
+ reserve_doi: str
+
+
+class InvenioRecord(TypedDict):
+ id: str
+ title: str
+ resource_type: ResourceType
+ publication_date: str
+ access: RecordAccess
+ files: RecordFiles
+ metadata: RecordMetadata
+ links: RecordLinks
+
+
class InvenioFilesSource(BaseFilesSource):
"""A files source for Invenio turn-key research data management repository."""
@@ -45,26 +125,56 @@ def __init__(self, **kwd: Unpack[InvenioFilesSourceProperties]):
self._props = props
def _list(self, path="/", recursive=True, user_context=None, opts: Optional[FilesSourceOptions] = None):
- is_listing_records = path == "/"
- if is_listing_records:
- # TODO: This is limited to 25 records by default. We should add pagination support.
- request_url = urljoin(self._invenio_url, "api/records")
- else:
- # listing a record's files
- request_url = urljoin(self._invenio_url, f"{path}/files")
+ is_root_path = path == "/"
+ if is_root_path:
+ return self._list_records(user_context)
+ return self._list_record_files(path, user_context)
- rval = []
+ def _realize_to(
+ self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ):
+ # TODO: source_path will be wrong when constructed from the UI as it assumes the target_uri is `get_root_uri() + filename`
+
+ remote_path = urljoin(self._invenio_url, source_path)
+ # TODO: user_context is always None here when called from a data fetch.
+ # This prevents downloading files that require authentication even if the user provided a token.
+ headers = self._get_request_headers(user_context)
+ req = urllib.request.Request(remote_path, headers=headers)
+ with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT, context=SSL_CONTEXT) as page:
+ f = open(native_path, "wb")
+ return stream_to_open_named_file(
+ page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
+ )
+
+ def _write_from(
+ self, target_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ):
+ filename = os.path.basename(target_path)
+ record_title = f"{filename} (exported by Galaxy)"
+ draft_record = self._create_draft_record(title=record_title, user_context=user_context)
+ try:
+ self._upload_file_to_draft_record(draft_record, filename, native_path, user_context=user_context)
+ self._publish_draft_record(draft_record, user_context=user_context)
+ except Exception:
+ self._delete_draft_record(draft_record, user_context)
+ raise
+
+ def _list_records(self, user_context=None):
+ # TODO: This is limited to 25 records by default. Add pagination support?
+ request_url = urljoin(self._invenio_url, "api/records")
+ response_data = self._get_response(user_context, request_url)
+ return self._get_records_from_response(response_data)
+
+ def _list_record_files(self, path, user_context=None):
+ request_url = urljoin(self._invenio_url, f"{path}/files")
+ response_data = self._get_response(user_context, request_url)
+ return self._get_record_files_from_response(path, response_data)
+
+ def _get_response(self, user_context, request_url: str) -> dict:
headers = self._get_request_headers(user_context)
response = requests.get(request_url, headers=headers, verify=VERIFY)
- if response.status_code == 200:
- response_json = response.json()
- if is_listing_records:
- rval = self._get_records_from_response(path, response_json)
- else:
- rval = self._get_record_files_from_response(path, response_json)
- else:
- raise Exception(f"Request to {request_url} failed with status code {response.status_code}")
- return rval
+ self._ensure_response_has_expected_status_code(response, 200)
+ return response.json()
def _get_request_headers(self, user_context):
preferences = user_context.preferences if user_context else None
@@ -72,7 +182,7 @@ def _get_request_headers(self, user_context):
headers = {"Authorization": f"Bearer {token}"} if token else {}
return headers
- def _get_records_from_response(self, path: str, response: dict):
+ def _get_records_from_response(self, response: dict):
records = response["hits"]["hits"]
rval = []
for record in records:
@@ -83,7 +193,7 @@ def _get_records_from_response(self, path: str, response: dict):
"name": record["metadata"]["title"],
"ctime": record["created"],
"uri": uri,
- "path": path,
+ "path": f"/{record['id']}",
}
)
@@ -98,6 +208,7 @@ def _get_record_files_from_response(self, path: str, response: dict):
for entry in entries:
if entry.get("status") == "completed":
uri = self._to_plugin_uri(entry["links"]["content"])
+ path = self._to_plugin_uri(entry["links"]["self"])
rval.append(
{
"class": "File",
@@ -113,25 +224,6 @@ def _get_record_files_from_response(self, path: str, response: dict):
def _to_plugin_uri(self, uri: str) -> str:
return uri.replace(self._invenio_url, self.get_uri_root())
- def _realize_to(
- self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
- ):
- remote_path = urljoin(self._invenio_url, source_path)
- # TODO: user_context is always None here when called from a data fetch.
- # This prevents downloading files that require authentication even if the user provided a token.
- headers = self._get_request_headers(user_context)
- req = urllib.request.Request(remote_path, headers=headers)
- with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT, context=SSL_CONTEXT) as page:
- f = open(native_path, "wb")
- return stream_to_open_named_file(
- page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
- )
-
- def _write_from(
- self, target_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
- ):
- raise NotImplementedError()
-
def _serialization_props(self, user_context=None) -> InvenioFilesSourceProperties:
effective_props = {}
for key, val in self._props.items():
@@ -139,5 +231,103 @@ def _serialization_props(self, user_context=None) -> InvenioFilesSourcePropertie
effective_props["url"] = self._invenio_url
return cast(InvenioFilesSourceProperties, effective_props)
+ def _create_draft_record(self, title: str, user_context=None) -> InvenioRecord:
+ today = datetime.date.today().isoformat()
+ creator = self._get_creator_from_user_context(user_context)
+ should_publish = self._get_public_records_user_setting_enabled_status(user_context)
+ access = "public" if should_publish else "restricted"
+ create_record_request = {
+ "access": {"record": access, "files": access},
+ "files": {"enabled": True},
+ "metadata": {
+ "title": title,
+ "publication_date": today,
+ "resource_type": {"id": "dataset"},
+ "creators": [
+ creator,
+ ],
+ },
+ }
+
+ headers = self._get_request_headers(user_context)
+ if "Authorization" not in headers:
+ raise Exception(
+ "Cannot create record without authentication token. Please set your personal access token in your Galaxy preferences."
+ )
+
+ create_record_url = urljoin(self._invenio_url, "api/records")
+ response = requests.post(create_record_url, json=create_record_request, headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 201)
+ record = response.json()
+ return record
+
+ def _delete_draft_record(self, record: InvenioRecord, user_context=None):
+ delete_record_url = record["links"]["self"]
+ headers = self._get_request_headers(user_context)
+ response = requests.delete(delete_record_url, headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 204)
+
+ def _upload_file_to_draft_record(self, record: InvenioRecord, filename: str, native_path: str, user_context=None):
+ upload_file_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/files")
+ headers = self._get_request_headers(user_context)
+
+ # Add file metadata
+ response = requests.post(upload_file_url, json=[{"key": filename}], headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 201)
+
+ # Upload file content
+ file_entry = response.json()["entries"][0]
+ upload_file_content_url = file_entry["links"]["content"]
+ commit_file_upload_url = file_entry["links"]["commit"]
+ with open(native_path, "rb") as file:
+ response = requests.put(upload_file_content_url, data=file, headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 200)
+
+ # Commit file upload
+ response = requests.post(commit_file_upload_url, headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 200)
+
+ def _publish_draft_record(self, record: InvenioRecord, user_context=None):
+ publish_record_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/actions/publish")
+ headers = self._get_request_headers(user_context)
+ response = requests.post(publish_record_url, headers=headers, verify=VERIFY)
+ self._ensure_response_has_expected_status_code(response, 202)
+
+ def _get_creator_from_user_context(self, user_context):
+ preferences = user_context.preferences if user_context else None
+ public_name = preferences.get(f"{self.id}|public_name", None) if preferences else None
+ family_name = "Galaxy User"
+ given_name = "Anonymous"
+ if public_name:
+ tokens = public_name.split(", ")
+ if len(tokens) == 2:
+ family_name = tokens[0]
+ given_name = tokens[1]
+ else:
+ given_name = public_name
+ return {"person_or_org": {"family_name": family_name, "given_name": given_name, "type": "personal"}}
+
+ def _get_public_records_user_setting_enabled_status(self, user_context) -> bool:
+ preferences = user_context.preferences if user_context else None
+ public_records = preferences.get(f"{self.id}|public_records", None) if preferences else None
+ if public_records:
+ return True
+ return False
+
+ def _ensure_response_has_expected_status_code(self, response, expected_status_code: int):
+ if response.status_code != expected_status_code:
+ error_message = self._get_response_error_message(response)
+ raise Exception(
+ f"Request to {response.url} failed with status code {response.status_code}: {error_message}"
+ )
+
+ def _get_response_error_message(self, response):
+ response_json = response.json()
+ error_message = response_json.get("message") if response.status_code == 400 else response.text
+ errors = response_json.get("errors", [])
+ for error in errors:
+ error_message += f"\n{json.dumps(error)}"
+ return error_message
+
__all__ = ("InvenioFilesSource",)
From 333b251efe93f1a2353a873a4bd2459ec45d4d62 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Tue, 11 Jul 2023 14:26:04 +0200
Subject: [PATCH 04/41] Use vault secret to store personal token
Also add example config to user_preferences_extra_conf.yml.sample
---
.../user_preferences_extra_conf.yml.sample | 17 +++++
lib/galaxy/files/sources/invenio.py | 63 ++++++++++++++-----
2 files changed, 64 insertions(+), 16 deletions(-)
diff --git a/lib/galaxy/config/sample/user_preferences_extra_conf.yml.sample b/lib/galaxy/config/sample/user_preferences_extra_conf.yml.sample
index d0280966e40e..17c7fbf41572 100644
--- a/lib/galaxy/config/sample/user_preferences_extra_conf.yml.sample
+++ b/lib/galaxy/config/sample/user_preferences_extra_conf.yml.sample
@@ -94,3 +94,20 @@ preferences:
label: Password
type: password
required: False
+
+ invenio:
+ description: Your Invenio RDM Account
+ inputs:
+ - name: token
+ label: Personal Token to publish records to Invenio RDM
+ type: secret
+ store: vault # Requires setting up vault_config_file in your galaxy.yml
+ required: False
+ - name: public_name
+ label: Public name to publish records (formatted as "Lastname, Firstname")
+ type: text
+ required: False
+ - name: public_records
+ label: Whether to publish records (file exports) or make them restricted. Only public records can be imported back.
+ type: boolean
+ required: False
diff --git a/lib/galaxy/files/sources/invenio.py b/lib/galaxy/files/sources/invenio.py
index 42cd737a0c08..1aa70ed0c6af 100644
--- a/lib/galaxy/files/sources/invenio.py
+++ b/lib/galaxy/files/sources/invenio.py
@@ -17,6 +17,7 @@
Unpack,
)
+from galaxy.files import ProvidesUserFileSourcesUserContext
from galaxy.files.sources import (
BaseFilesSource,
FilesSourceOptions,
@@ -124,14 +125,24 @@ def __init__(self, **kwd: Unpack[InvenioFilesSourceProperties]):
self._invenio_url = base_url
self._props = props
- def _list(self, path="/", recursive=True, user_context=None, opts: Optional[FilesSourceOptions] = None):
+ def _list(
+ self,
+ path="/",
+ recursive=True,
+ user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
+ opts: Optional[FilesSourceOptions] = None,
+ ):
is_root_path = path == "/"
if is_root_path:
return self._list_records(user_context)
return self._list_record_files(path, user_context)
def _realize_to(
- self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ self,
+ source_path: str,
+ native_path: str,
+ user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
+ opts: Optional[FilesSourceOptions] = None,
):
# TODO: source_path will be wrong when constructed from the UI as it assumes the target_uri is `get_root_uri() + filename`
@@ -147,7 +158,11 @@ def _realize_to(
)
def _write_from(
- self, target_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
+ self,
+ target_path: str,
+ native_path: str,
+ user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
+ opts: Optional[FilesSourceOptions] = None,
):
filename = os.path.basename(target_path)
record_title = f"{filename} (exported by Galaxy)"
@@ -159,26 +174,26 @@ def _write_from(
self._delete_draft_record(draft_record, user_context)
raise
- def _list_records(self, user_context=None):
+ def _list_records(self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
# TODO: This is limited to 25 records by default. Add pagination support?
request_url = urljoin(self._invenio_url, "api/records")
response_data = self._get_response(user_context, request_url)
return self._get_records_from_response(response_data)
- def _list_record_files(self, path, user_context=None):
+ def _list_record_files(self, path, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
request_url = urljoin(self._invenio_url, f"{path}/files")
response_data = self._get_response(user_context, request_url)
return self._get_record_files_from_response(path, response_data)
- def _get_response(self, user_context, request_url: str) -> dict:
+ def _get_response(self, user_context: Optional[ProvidesUserFileSourcesUserContext], request_url: str) -> dict:
headers = self._get_request_headers(user_context)
response = requests.get(request_url, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 200)
return response.json()
- def _get_request_headers(self, user_context):
- preferences = user_context.preferences if user_context else None
- token = preferences.get(f"{self.id}|token", None) if preferences else None
+ def _get_request_headers(self, user_context: Optional[ProvidesUserFileSourcesUserContext]):
+ vault = user_context.user_vault if user_context else None
+ token = vault.read_secret(f"preferences/{self.id}/token") if vault else None
headers = {"Authorization": f"Bearer {token}"} if token else {}
return headers
@@ -224,14 +239,18 @@ def _get_record_files_from_response(self, path: str, response: dict):
def _to_plugin_uri(self, uri: str) -> str:
return uri.replace(self._invenio_url, self.get_uri_root())
- def _serialization_props(self, user_context=None) -> InvenioFilesSourceProperties:
+ def _serialization_props(
+ self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ) -> InvenioFilesSourceProperties:
effective_props = {}
for key, val in self._props.items():
effective_props[key] = self._evaluate_prop(val, user_context=user_context)
effective_props["url"] = self._invenio_url
return cast(InvenioFilesSourceProperties, effective_props)
- def _create_draft_record(self, title: str, user_context=None) -> InvenioRecord:
+ def _create_draft_record(
+ self, title: str, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ) -> InvenioRecord:
today = datetime.date.today().isoformat()
creator = self._get_creator_from_user_context(user_context)
should_publish = self._get_public_records_user_setting_enabled_status(user_context)
@@ -261,13 +280,21 @@ def _create_draft_record(self, title: str, user_context=None) -> InvenioRecord:
record = response.json()
return record
- def _delete_draft_record(self, record: InvenioRecord, user_context=None):
+ def _delete_draft_record(
+ self, record: InvenioRecord, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ):
delete_record_url = record["links"]["self"]
headers = self._get_request_headers(user_context)
response = requests.delete(delete_record_url, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 204)
- def _upload_file_to_draft_record(self, record: InvenioRecord, filename: str, native_path: str, user_context=None):
+ def _upload_file_to_draft_record(
+ self,
+ record: InvenioRecord,
+ filename: str,
+ native_path: str,
+ user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
+ ):
upload_file_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/files")
headers = self._get_request_headers(user_context)
@@ -287,13 +314,15 @@ def _upload_file_to_draft_record(self, record: InvenioRecord, filename: str, nat
response = requests.post(commit_file_upload_url, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 200)
- def _publish_draft_record(self, record: InvenioRecord, user_context=None):
+ def _publish_draft_record(
+ self, record: InvenioRecord, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ):
publish_record_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/actions/publish")
headers = self._get_request_headers(user_context)
response = requests.post(publish_record_url, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 202)
- def _get_creator_from_user_context(self, user_context):
+ def _get_creator_from_user_context(self, user_context: Optional[ProvidesUserFileSourcesUserContext]):
preferences = user_context.preferences if user_context else None
public_name = preferences.get(f"{self.id}|public_name", None) if preferences else None
family_name = "Galaxy User"
@@ -307,7 +336,9 @@ def _get_creator_from_user_context(self, user_context):
given_name = public_name
return {"person_or_org": {"family_name": family_name, "given_name": given_name, "type": "personal"}}
- def _get_public_records_user_setting_enabled_status(self, user_context) -> bool:
+ def _get_public_records_user_setting_enabled_status(
+ self, user_context: Optional[ProvidesUserFileSourcesUserContext]
+ ) -> bool:
preferences = user_context.preferences if user_context else None
public_records = preferences.get(f"{self.id}|public_records", None) if preferences else None
if public_records:
From 6fb9349541c36263883114ec2701e3bcbafa7c23 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 16:28:37 +0200
Subject: [PATCH 05/41] Add write intent option to File Sources
This options helps to identify when we are browsing a file source with the "intent to write" to it.
This is helpful to avoid listing those elements (directories/records) that might not be writable (protected, read-only, etc.) even if the file source itself is writable.
The plugin implementation should handle this option accordingly if necessary.
---
lib/galaxy/files/sources/__init__.py | 7 ++++++-
lib/galaxy/managers/remote_files.py | 12 +++++++++++-
lib/galaxy/webapps/galaxy/api/remote_files.py | 12 +++++++++++-
3 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/lib/galaxy/files/sources/__init__.py b/lib/galaxy/files/sources/__init__.py
index d369fcfe4293..272547bc26f8 100644
--- a/lib/galaxy/files/sources/__init__.py
+++ b/lib/galaxy/files/sources/__init__.py
@@ -53,7 +53,12 @@ class FilesSourceProperties(TypedDict):
class FilesSourceOptions:
- """Options to control behaviour of filesource operations, such as realize_to and write_from"""
+ """Options to control behavior of file source operations, such as realize_to, write_from and list."""
+
+ # Indicates access to the FS operation with intent to write.
+ # A file source can be "writeable" but, for example, some directories (or elements) may be restricted or read-only
+ # so those should be skipped while browsing with write_intent=True.
+ write_intent: Optional[bool]
# Property overrides for values initially configured through the constructor. For example
# the HTTPFilesSource passes in additional http_headers through these properties, which
diff --git a/lib/galaxy/managers/remote_files.py b/lib/galaxy/managers/remote_files.py
index fd577e546b69..fb589880133f 100644
--- a/lib/galaxy/managers/remote_files.py
+++ b/lib/galaxy/managers/remote_files.py
@@ -8,6 +8,7 @@
ConfiguredFileSources,
ProvidesUserFileSourcesUserContext,
)
+from galaxy.files.sources import FilesSourceOptions
from galaxy.managers.context import ProvidesUserContext
from galaxy.schema.remote_files import (
AnyRemoteFilesListResponse,
@@ -41,6 +42,7 @@ def index(
format: Optional[RemoteFilesFormat],
recursive: Optional[bool],
disable: Optional[RemoteFilesDisableMode],
+ write_intent: Optional[bool] = False,
) -> AnyRemoteFilesListResponse:
"""Returns a list of remote files available to the user."""
@@ -75,8 +77,16 @@ def index(
file_source_path = self._file_sources.get_file_source_path(uri)
file_source = file_source_path.file_source
+
+ opts = FilesSourceOptions()
+ opts.write_intent = write_intent or False
try:
- index = file_source.list(file_source_path.path, recursive=recursive, user_context=user_file_source_context)
+ index = file_source.list(
+ file_source_path.path,
+ recursive=recursive,
+ user_context=user_file_source_context,
+ opts=opts,
+ )
except exceptions.MessageException:
log.warning(f"Problem listing file source path {file_source_path}", exc_info=True)
raise
diff --git a/lib/galaxy/webapps/galaxy/api/remote_files.py b/lib/galaxy/webapps/galaxy/api/remote_files.py
index 39f21c872768..84957a11018e 100644
--- a/lib/galaxy/webapps/galaxy/api/remote_files.py
+++ b/lib/galaxy/webapps/galaxy/api/remote_files.py
@@ -59,6 +59,15 @@
),
)
+WriteIntentQueryParam: Optional[bool] = Query(
+ default=None,
+ title="Write Intent",
+ description=(
+ "Whether the query is made with the intention of writing to the source."
+ " If set to True, only entries that can be written to will be accessible."
+ ),
+)
+
BrowsableQueryParam: Optional[bool] = Query(
default=True,
title="Browsable filesources only",
@@ -90,9 +99,10 @@ async def index(
format: Optional[RemoteFilesFormat] = FormatQueryParam,
recursive: Optional[bool] = RecursiveQueryParam,
disable: Optional[RemoteFilesDisableMode] = DisableModeQueryParam,
+ write_intent: Optional[bool] = WriteIntentQueryParam,
) -> AnyRemoteFilesListResponse:
"""Lists all remote files available to the user from different sources."""
- return self.manager.index(user_ctx, target, format, recursive, disable)
+ return self.manager.index(user_ctx, target, format, recursive, disable, write_intent=write_intent)
@router.get(
"/api/remote_files/plugins",
From bf15a1df556c22dd5afd21172a0f89d19f1b318e Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 16:38:44 +0200
Subject: [PATCH 06/41] Add create_entry endpoint for File sources
This allows to create a new entry (directory/record) in those remote file sources
that supports it. By default, the endpoint will raise NotImplementedError unless
the plugin implements the _create_entry method.
---
lib/galaxy/files/sources/__init__.py | 31 +++++++++++++++--
lib/galaxy/managers/remote_files.py | 21 ++++++++++++
lib/galaxy/schema/remote_files.py | 34 +++++++++++++++++++
lib/galaxy/webapps/galaxy/api/remote_files.py | 20 ++++++++++-
4 files changed, 103 insertions(+), 3 deletions(-)
diff --git a/lib/galaxy/files/sources/__init__.py b/lib/galaxy/files/sources/__init__.py
index 272547bc26f8..863dfb03e3d5 100644
--- a/lib/galaxy/files/sources/__init__.py
+++ b/lib/galaxy/files/sources/__init__.py
@@ -67,6 +67,18 @@ class FilesSourceOptions:
extra_props: Optional[FilesSourceProperties]
+class EntryData(TypedDict):
+ name: str
+ # May contain additional properties depending on the file source
+
+
+class Entry(TypedDict):
+ name: str
+ uri: str
+ # May contain additional properties depending on the file source
+ external_link: NotRequired[str]
+
+
class SingleFileSource(metaclass=abc.ABCMeta):
"""
Represents a protocol handler for a single remote file that can be read by or written to by Galaxy.
@@ -298,9 +310,20 @@ def list(self, path="/", recursive=False, user_context=None, opts: Optional[File
def _list(self, path="/", recursive=False, user_context=None, opts: Optional[FilesSourceOptions] = None):
pass
+ def create_entry(
+ self, entry_data: EntryData, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ) -> Entry:
+ self._ensure_writeable()
+ self._check_user_access(user_context)
+ return self._create_entry(entry_data, user_context, opts)
+
+ def _create_entry(
+ self, entry_data: EntryData, user_context=None, opts: Optional[FilesSourceOptions] = None
+ ) -> Entry:
+ raise NotImplementedError()
+
def write_from(self, target_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
- if not self.get_writable():
- raise Exception("Cannot write to a non-writable file source.")
+ self._ensure_writeable()
self._check_user_access(user_context)
self._write_from(target_path, native_path, user_context=user_context, opts=opts)
@@ -316,6 +339,10 @@ def realize_to(self, source_path, native_path, user_context=None, opts: Optional
def _realize_to(self, source_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
pass
+ def _ensure_writeable(self):
+ if not self.get_writable():
+ raise Exception("Cannot write to a non-writable file source.")
+
def _check_user_access(self, user_context):
"""Raises an exception if the given user doesn't have the rights to access this file source.
diff --git a/lib/galaxy/managers/remote_files.py b/lib/galaxy/managers/remote_files.py
index fb589880133f..470ae5cb3307 100644
--- a/lib/galaxy/managers/remote_files.py
+++ b/lib/galaxy/managers/remote_files.py
@@ -12,6 +12,8 @@
from galaxy.managers.context import ProvidesUserContext
from galaxy.schema.remote_files import (
AnyRemoteFilesListResponse,
+ CreatedEntryResponse,
+ CreateEntryPayload,
FilesSourcePlugin,
FilesSourcePluginList,
RemoteFilesDisableMode,
@@ -139,3 +141,22 @@ def get_files_source_plugins(
@property
def _file_sources(self) -> ConfiguredFileSources:
return self._app.file_sources
+
+ def create_entry(self, user_ctx: ProvidesUserContext, entry_data: CreateEntryPayload) -> CreatedEntryResponse:
+ """Create an entry (directory or record) in a remote files location."""
+ target = entry_data.target
+ user_file_source_context = ProvidesUserFileSourcesUserContext(user_ctx)
+ self._file_sources.validate_uri_root(target, user_context=user_file_source_context)
+ file_source_path = self._file_sources.get_file_source_path(target)
+ file_source = file_source_path.file_source
+ try:
+ result = file_source.create_entry(entry_data.dict(), user_context=user_file_source_context)
+ except Exception:
+ message = f"Problem creating entry {entry_data.name} in file source {entry_data.target}"
+ log.warning(message, exc_info=True)
+ raise exceptions.InternalServerError(message)
+ return CreatedEntryResponse(
+ name=result["name"],
+ uri=result["uri"],
+ external_link=result.get("external_link", None),
+ )
diff --git a/lib/galaxy/schema/remote_files.py b/lib/galaxy/schema/remote_files.py
index 66302390ea92..a408f076a9c8 100644
--- a/lib/galaxy/schema/remote_files.py
+++ b/lib/galaxy/schema/remote_files.py
@@ -149,3 +149,37 @@ class ListUriResponse(Model):
AnyRemoteFilesListResponse = Union[ListUriResponse, ListJstreeResponse]
+
+
+class CreateEntryPayload(Model):
+ target: str = Field(
+ Required,
+ title="Target",
+ description="The target file source to create the entry in.",
+ )
+ name: str = Field(
+ Required,
+ title="Name",
+ description="The name of the entry to create.",
+ example="my_new_entry",
+ )
+
+
+class CreatedEntryResponse(Model):
+ name: str = Field(
+ Required,
+ title="Name",
+ description="The name of the created entry.",
+ example="my_new_entry",
+ )
+ uri: str = Field(
+ Required,
+ title="URI",
+ description="The URI of the created entry.",
+ example="gxfiles://my_new_entry",
+ )
+ external_link: Optional[str] = Field(
+ default=None,
+ title="External link",
+ description="An optional external link to the created entry if available.",
+ )
diff --git a/lib/galaxy/webapps/galaxy/api/remote_files.py b/lib/galaxy/webapps/galaxy/api/remote_files.py
index 84957a11018e..16284e265e3c 100644
--- a/lib/galaxy/webapps/galaxy/api/remote_files.py
+++ b/lib/galaxy/webapps/galaxy/api/remote_files.py
@@ -4,12 +4,15 @@
import logging
from typing import Optional
+from fastapi import Body
from fastapi.param_functions import Query
from galaxy.managers.context import ProvidesUserContext
from galaxy.managers.remote_files import RemoteFilesManager
from galaxy.schema.remote_files import (
AnyRemoteFilesListResponse,
+ CreatedEntryResponse,
+ CreateEntryPayload,
FilesSourcePluginList,
RemoteFilesDisableMode,
RemoteFilesFormat,
@@ -115,4 +118,19 @@ async def plugins(
browsable_only: Optional[bool] = BrowsableQueryParam,
) -> FilesSourcePluginList:
"""Display plugin information for each of the gxfiles:// URI targets available."""
- return self.manager.get_files_source_plugins(user_ctx, browsable_only)
+
+ @router.post(
+ "/api/remote_files",
+ summary="Creates a new entry (directory/record) on the remote files source.",
+ )
+ async def create_entry(
+ self,
+ user_ctx: ProvidesUserContext = DependsOnTrans,
+ payload: CreateEntryPayload = Body(
+ ...,
+ title="Entry Data",
+ description="Information about the entry to create. Depends on the target file source.",
+ ),
+ ) -> CreatedEntryResponse:
+ """Creates a new entry on the remote files source."""
+ return self.manager.create_entry(user_ctx, payload)
From 35dfa9c2aced221e185179bd4e11617ccdf7aed4 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 17:23:53 +0200
Subject: [PATCH 07/41] Add RDMFilesSource subclass
Implement the _create_entry method for InvenioRDMFilesSource and some additional refactors.
---
lib/galaxy/files/sources/_rdm.py | 61 +++++++++++++++
lib/galaxy/files/sources/invenio.py | 116 +++++++++++++++++-----------
2 files changed, 133 insertions(+), 44 deletions(-)
create mode 100644 lib/galaxy/files/sources/_rdm.py
diff --git a/lib/galaxy/files/sources/_rdm.py b/lib/galaxy/files/sources/_rdm.py
new file mode 100644
index 000000000000..9479982be50a
--- /dev/null
+++ b/lib/galaxy/files/sources/_rdm.py
@@ -0,0 +1,61 @@
+import logging
+from typing import (
+ cast,
+ Optional,
+)
+
+from typing_extensions import Unpack
+
+from galaxy.files import ProvidesUserFileSourcesUserContext
+from galaxy.files.sources import (
+ BaseFilesSource,
+ FilesSourceProperties,
+)
+
+log = logging.getLogger(__name__)
+
+
+class RDMFilesSourceProperties(FilesSourceProperties):
+ url: str
+
+
+class RDMFilesSource(BaseFilesSource):
+ """Base class for Research Data Management (RDM) file sources.
+
+ This class is not intended to be used directly, but rather to be subclassed
+ by file sources that interact with RDM repositories.
+
+ A RDM file source is similar to a regular file source, but instead of tree of
+ files and directories, it provides a (one level) list of records (representing directories)
+ that can contain only files (no subdirectories).
+
+ In addition, RDM file sources might need to create a new record (directory) in advance in the
+ repository, and then upload a file to it. This is done by calling the `create_entry`
+ method.
+
+ """
+
+ # This allows to filter out the RDM file sources from the list of available
+ # file sources.
+ supports_rdm = True
+
+ def __init__(self, **kwd: Unpack[FilesSourceProperties]):
+ props = self._parse_common_config_opts(kwd)
+ base_url = props.get("url", None)
+ if not base_url:
+ raise Exception("URL for RDM repository must be provided in configuration")
+ self._repository_url = base_url
+ self._props = props
+
+ @property
+ def repository_url(self) -> str:
+ return self._repository_url
+
+ def _serialization_props(
+ self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ) -> RDMFilesSourceProperties:
+ effective_props = {}
+ for key, val in self._props.items():
+ effective_props[key] = self._evaluate_prop(val, user_context=user_context)
+ effective_props["url"] = self.repository_url
+ return cast(RDMFilesSourceProperties, effective_props)
diff --git a/lib/galaxy/files/sources/invenio.py b/lib/galaxy/files/sources/invenio.py
index 1aa70ed0c6af..d1765f0eafc7 100644
--- a/lib/galaxy/files/sources/invenio.py
+++ b/lib/galaxy/files/sources/invenio.py
@@ -4,7 +4,6 @@
import ssl
import urllib.request
from typing import (
- cast,
List,
Optional,
)
@@ -14,15 +13,15 @@
from typing_extensions import (
Literal,
TypedDict,
- Unpack,
)
from galaxy.files import ProvidesUserFileSourcesUserContext
from galaxy.files.sources import (
- BaseFilesSource,
+ Entry,
+ EntryData,
FilesSourceOptions,
- FilesSourceProperties,
)
+from galaxy.files.sources._rdm import RDMFilesSource
from galaxy.util import (
DEFAULT_SOCKET_TIMEOUT,
get_charset_from_http_headers,
@@ -39,10 +38,6 @@
AccessStatus = Literal["public", "restricted"]
-class InvenioFilesSourceProperties(FilesSourceProperties):
- url: str
-
-
class ResourceType(TypedDict):
id: str
@@ -104,6 +99,8 @@ class RecordLinks(TypedDict):
class InvenioRecord(TypedDict):
id: str
title: str
+ created: str
+ updated: str
resource_type: ResourceType
publication_date: str
access: RecordAccess
@@ -112,19 +109,11 @@ class InvenioRecord(TypedDict):
links: RecordLinks
-class InvenioFilesSource(BaseFilesSource):
+class InvenioRDMFilesSource(RDMFilesSource):
"""A files source for Invenio turn-key research data management repository."""
plugin_type = "inveniordm"
- def __init__(self, **kwd: Unpack[InvenioFilesSourceProperties]):
- props = self._parse_common_config_opts(kwd)
- base_url = props.get("url", None)
- if not base_url:
- raise Exception("InvenioFilesSource requires a url")
- self._invenio_url = base_url
- self._props = props
-
def _list(
self,
path="/",
@@ -132,10 +121,24 @@ def _list(
user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
opts: Optional[FilesSourceOptions] = None,
):
+ write_intent = opts and opts.write_intent or False
is_root_path = path == "/"
if is_root_path:
- return self._list_records(user_context)
- return self._list_record_files(path, user_context)
+ return self._list_records(write_intent, user_context)
+ return self._list_record_files(path, write_intent, user_context)
+
+ def _create_entry(
+ self,
+ entry_data: EntryData,
+ user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
+ opts: Optional[FilesSourceOptions] = None,
+ ) -> Entry:
+ record = self._create_draft_record(entry_data["name"], user_context=user_context)
+ return {
+ "uri": self._to_plugin_uri(record["links"]["record"]),
+ "name": record["metadata"]["title"],
+ "external_link": record["links"]["self_html"],
+ }
def _realize_to(
self,
@@ -144,9 +147,8 @@ def _realize_to(
user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
opts: Optional[FilesSourceOptions] = None,
):
- # TODO: source_path will be wrong when constructed from the UI as it assumes the target_uri is `get_root_uri() + filename`
-
- remote_path = urljoin(self._invenio_url, source_path)
+ # source_path = '/api/records/pxpnk-7c133/Tester.rocrate.zip'
+ remote_path = urljoin(self.repository_url, source_path)
# TODO: user_context is always None here when called from a data fetch.
# This prevents downloading files that require authentication even if the user provided a token.
headers = self._get_request_headers(user_context)
@@ -165,23 +167,48 @@ def _write_from(
opts: Optional[FilesSourceOptions] = None,
):
filename = os.path.basename(target_path)
- record_title = f"{filename} (exported by Galaxy)"
- draft_record = self._create_draft_record(title=record_title, user_context=user_context)
+ dirname = os.path.dirname(target_path)
+ record_id = dirname.replace("/api/records/", "")
+ use_existing_record = len(record_id) > 5
+
+ # TODO: if we create the record here, then the target_path of the export will not have the record id and it will not be possible to import it back.
+ # We need to create the record before the export and then use the record id in the target_path.
+
+ if use_existing_record:
+ draft_record = self._get_draft_record(record_id, user_context=user_context)
+ else:
+ record_title = f"{filename} (exported by Galaxy)"
+ draft_record = self._create_draft_record(title=record_title, user_context=user_context)
try:
self._upload_file_to_draft_record(draft_record, filename, native_path, user_context=user_context)
self._publish_draft_record(draft_record, user_context=user_context)
except Exception:
- self._delete_draft_record(draft_record, user_context)
+ if not use_existing_record:
+ self._delete_draft_record(draft_record, user_context)
raise
- def _list_records(self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
+ def _list_records(self, write_intent: bool, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
+ if write_intent:
+ return self._list_writeable_records(user_context)
+ return self._list_all_records(user_context)
+
+ def _list_all_records(self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
+ # TODO: This is limited to 25 records by default. Add pagination support?
+ request_url = urljoin(self.repository_url, "api/records")
+ response_data = self._get_response(user_context, request_url)
+ return self._get_records_from_response(response_data)
+
+ def _list_writeable_records(self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
# TODO: This is limited to 25 records by default. Add pagination support?
- request_url = urljoin(self._invenio_url, "api/records")
+ # Only draft records can be written to.
+ request_url = urljoin(self.repository_url, "api/user/records?is_published=false")
response_data = self._get_response(user_context, request_url)
return self._get_records_from_response(response_data)
- def _list_record_files(self, path, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
- request_url = urljoin(self._invenio_url, f"{path}/files")
+ def _list_record_files(
+ self, path: str, write_intent: bool, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
+ ):
+ request_url = urljoin(self.repository_url, f"{path}{'/draft' if write_intent else '' }/files")
response_data = self._get_response(user_context, request_url)
return self._get_record_files_from_response(path, response_data)
@@ -199,9 +226,13 @@ def _get_request_headers(self, user_context: Optional[ProvidesUserFileSourcesUse
def _get_records_from_response(self, response: dict):
records = response["hits"]["hits"]
+ return self._get_records_as_directories(records)
+
+ def _get_records_as_directories(self, records):
rval = []
for record in records:
uri = self._to_plugin_uri(record["links"]["self"])
+ # TODO: define model for Directory and File
rval.append(
{
"class": "Directory",
@@ -237,16 +268,12 @@ def _get_record_files_from_response(self, path: str, response: dict):
return rval
def _to_plugin_uri(self, uri: str) -> str:
- return uri.replace(self._invenio_url, self.get_uri_root())
+ return uri.replace(self.repository_url, self.get_uri_root())
- def _serialization_props(
- self, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
- ) -> InvenioFilesSourceProperties:
- effective_props = {}
- for key, val in self._props.items():
- effective_props[key] = self._evaluate_prop(val, user_context=user_context)
- effective_props["url"] = self._invenio_url
- return cast(InvenioFilesSourceProperties, effective_props)
+ def _get_draft_record(self, record_id: str, user_context: Optional[ProvidesUserFileSourcesUserContext] = None):
+ request_url = urljoin(self.repository_url, f"api/records/{record_id}/draft")
+ draft_record = self._get_response(user_context, request_url)
+ return draft_record
def _create_draft_record(
self, title: str, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
@@ -274,7 +301,7 @@ def _create_draft_record(
"Cannot create record without authentication token. Please set your personal access token in your Galaxy preferences."
)
- create_record_url = urljoin(self._invenio_url, "api/records")
+ create_record_url = urljoin(self.repository_url, "api/records")
response = requests.post(create_record_url, json=create_record_request, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 201)
record = response.json()
@@ -295,15 +322,16 @@ def _upload_file_to_draft_record(
native_path: str,
user_context: Optional[ProvidesUserFileSourcesUserContext] = None,
):
- upload_file_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/files")
+ upload_file_url = record["links"]["files"]
headers = self._get_request_headers(user_context)
- # Add file metadata
+ # Add file metadata entry
response = requests.post(upload_file_url, json=[{"key": filename}], headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 201)
# Upload file content
- file_entry = response.json()["entries"][0]
+ entries = response.json()["entries"]
+ file_entry = next(entry for entry in entries if entry["key"] == filename)
upload_file_content_url = file_entry["links"]["content"]
commit_file_upload_url = file_entry["links"]["commit"]
with open(native_path, "rb") as file:
@@ -317,7 +345,7 @@ def _upload_file_to_draft_record(
def _publish_draft_record(
self, record: InvenioRecord, user_context: Optional[ProvidesUserFileSourcesUserContext] = None
):
- publish_record_url = urljoin(self._invenio_url, f"api/records/{record['id']}/draft/actions/publish")
+ publish_record_url = urljoin(self.repository_url, f"api/records/{record['id']}/draft/actions/publish")
headers = self._get_request_headers(user_context)
response = requests.post(publish_record_url, headers=headers, verify=VERIFY)
self._ensure_response_has_expected_status_code(response, 202)
@@ -361,4 +389,4 @@ def _get_response_error_message(self, response):
return error_message
-__all__ = ("InvenioFilesSource",)
+__all__ = ("InvenioRDMFilesSource",)
From 772292bac2308faa66d8b21248be81e81a9ef2b0 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 17:26:32 +0200
Subject: [PATCH 08/41] Allow filtering only RDM plugins in the API
---
lib/galaxy/files/__init__.py | 3 +++
lib/galaxy/managers/remote_files.py | 8 ++++++--
lib/galaxy/webapps/galaxy/api/remote_files.py | 10 ++++++++++
3 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/lib/galaxy/files/__init__.py b/lib/galaxy/files/__init__.py
index 88184c7862bc..97f8c4765054 100644
--- a/lib/galaxy/files/__init__.py
+++ b/lib/galaxy/files/__init__.py
@@ -165,11 +165,14 @@ def plugins_to_dict(
for_serialization: bool = False,
user_context: Optional["FileSourceDictifiable"] = None,
browsable_only: Optional[bool] = False,
+ rdm_only: Optional[bool] = False,
) -> List[Dict[str, Any]]:
rval = []
for file_source in self._file_sources:
if not file_source.user_has_access(user_context):
continue
+ if rdm_only and not getattr(file_source, "supports_rdm", False):
+ continue
if browsable_only and not file_source.get_browsable():
continue
el = file_source.to_dict(for_serialization=for_serialization, user_context=user_context)
diff --git a/lib/galaxy/managers/remote_files.py b/lib/galaxy/managers/remote_files.py
index 470ae5cb3307..863c0494436c 100644
--- a/lib/galaxy/managers/remote_files.py
+++ b/lib/galaxy/managers/remote_files.py
@@ -128,12 +128,16 @@ def index(
return index
def get_files_source_plugins(
- self, user_context: ProvidesUserContext, browsable_only: Optional[bool] = True
+ self, user_context: ProvidesUserContext, browsable_only: Optional[bool] = True, rdm_only: Optional[bool] = False
) -> FilesSourcePluginList:
"""Display plugin information for each of the gxfiles:// URI targets available."""
user_file_source_context = ProvidesUserFileSourcesUserContext(user_context)
+ browsable_only = True if browsable_only is None else browsable_only
+ rdm_only = rdm_only or False
plugins_dict = self._file_sources.plugins_to_dict(
- user_context=user_file_source_context, browsable_only=True if browsable_only is None else browsable_only
+ user_context=user_file_source_context,
+ browsable_only=browsable_only,
+ rdm_only=rdm_only,
)
plugins = [FilesSourcePlugin(**plugin_dict) for plugin_dict in plugins_dict]
return FilesSourcePluginList.construct(__root__=plugins)
diff --git a/lib/galaxy/webapps/galaxy/api/remote_files.py b/lib/galaxy/webapps/galaxy/api/remote_files.py
index 16284e265e3c..ce930ebd0e11 100644
--- a/lib/galaxy/webapps/galaxy/api/remote_files.py
+++ b/lib/galaxy/webapps/galaxy/api/remote_files.py
@@ -80,6 +80,14 @@
),
)
+RDMOnlyQueryParam: Optional[bool] = Query(
+ default=False,
+ title="RDM only",
+ description=(
+ "Whether to return only RDM compatible plugins. The default is `False`, which will return all plugins."
+ ),
+)
+
@router.cbv
class FastAPIRemoteFiles:
@@ -116,8 +124,10 @@ async def plugins(
self,
user_ctx: ProvidesUserContext = DependsOnTrans,
browsable_only: Optional[bool] = BrowsableQueryParam,
+ rdm_only: Optional[bool] = RDMOnlyQueryParam,
) -> FilesSourcePluginList:
"""Display plugin information for each of the gxfiles:// URI targets available."""
+ return self.manager.get_files_source_plugins(user_ctx, browsable_only, rdm_only)
@router.post(
"/api/remote_files",
From 31000c101f551f56b9626a115c9d2f7e0df16e2a Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 17:27:59 +0200
Subject: [PATCH 09/41] Update client API schema
---
client/src/schema/schema.ts | 82 +++++++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/client/src/schema/schema.ts b/client/src/schema/schema.ts
index 5961148ca6a9..6668bfebbff0 100644
--- a/client/src/schema/schema.ts
+++ b/client/src/schema/schema.ts
@@ -1232,6 +1232,11 @@ export interface paths {
* @description Lists all remote files available to the user from different sources.
*/
get: operations["index_api_remote_files_get"];
+ /**
+ * Creates a new entry (directory/record) on the remote files source.
+ * @description Creates a new entry on the remote files source.
+ */
+ post: operations["create_entry_api_remote_files_post"];
};
"/api/remote_files/plugins": {
/**
@@ -2597,6 +2602,23 @@ export interface components {
ConvertedDatasetsMap: {
[key: string]: string | undefined;
};
+ /**
+ * CreateEntryPayload
+ * @description Base model definition with common configuration used by all derived models.
+ */
+ CreateEntryPayload: {
+ /**
+ * Name
+ * @description The name of the entry to create.
+ * @example my_new_entry
+ */
+ name: string;
+ /**
+ * Target
+ * @description The target file source to create the entry in.
+ */
+ target: string;
+ };
/**
* CreateHistoryContentFromStore
* @description Base model definition with common configuration used by all derived models.
@@ -2968,6 +2990,29 @@ export interface components {
*/
url: string;
};
+ /**
+ * CreatedEntryResponse
+ * @description Base model definition with common configuration used by all derived models.
+ */
+ CreatedEntryResponse: {
+ /**
+ * External link
+ * @description An optional external link to the created entry if available.
+ */
+ external_link?: string;
+ /**
+ * Name
+ * @description The name of the created entry.
+ * @example my_new_entry
+ */
+ name: string;
+ /**
+ * URI
+ * @description The URI of the created entry.
+ * @example gxfiles://my_new_entry
+ */
+ uri: string;
+ };
/**
* CreatedUserModel
* @description User in a transaction context.
@@ -11122,11 +11167,13 @@ export interface operations {
/** @description The requested format of returned data. Either `flat` to simply list all the files, `jstree` to get a tree representation of the files, or the default `uri` to list files and directories by their URI. */
/** @description Wether to recursively lists all sub-directories. This will be `True` by default depending on the `target`. */
/** @description (This only applies when `format` is `jstree`) The value can be either `folders` or `files` and it will disable the corresponding nodes of the tree. */
+ /** @description Whether the query is made with the intention of writing to the source. If set to True, only entries that can be written to will be accessible. */
query?: {
target?: string;
format?: components["schemas"]["RemoteFilesFormat"];
recursive?: boolean;
disable?: components["schemas"]["RemoteFilesDisableMode"];
+ write_intent?: boolean;
};
/** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */
header?: {
@@ -16081,11 +16128,13 @@ export interface operations {
/** @description The requested format of returned data. Either `flat` to simply list all the files, `jstree` to get a tree representation of the files, or the default `uri` to list files and directories by their URI. */
/** @description Wether to recursively lists all sub-directories. This will be `True` by default depending on the `target`. */
/** @description (This only applies when `format` is `jstree`) The value can be either `folders` or `files` and it will disable the corresponding nodes of the tree. */
+ /** @description Whether the query is made with the intention of writing to the source. If set to True, only entries that can be written to will be accessible. */
query?: {
target?: string;
format?: components["schemas"]["RemoteFilesFormat"];
recursive?: boolean;
disable?: components["schemas"]["RemoteFilesDisableMode"];
+ write_intent?: boolean;
};
/** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */
header?: {
@@ -16109,6 +16158,37 @@ export interface operations {
};
};
};
+ create_entry_api_remote_files_post: {
+ /**
+ * Creates a new entry (directory/record) on the remote files source.
+ * @description Creates a new entry on the remote files source.
+ */
+ parameters?: {
+ /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */
+ header?: {
+ "run-as"?: string;
+ };
+ };
+ requestBody: {
+ content: {
+ "application/json": components["schemas"]["CreateEntryPayload"];
+ };
+ };
+ responses: {
+ /** @description Successful Response */
+ 200: {
+ content: {
+ "application/json": components["schemas"]["CreatedEntryResponse"];
+ };
+ };
+ /** @description Validation Error */
+ 422: {
+ content: {
+ "application/json": components["schemas"]["HTTPValidationError"];
+ };
+ };
+ };
+ };
plugins_api_remote_files_plugins_get: {
/**
* Display plugin information for each of the gxfiles:// URI targets available.
@@ -16116,8 +16196,10 @@ export interface operations {
*/
parameters?: {
/** @description Whether to return browsable filesources only. The default is `True`, which will omit filesourceslike `http` and `base64` that do not implement a list method. */
+ /** @description Whether to return only RDM compatible plugins. The default is `False`, which will return all plugins. */
query?: {
browsable_only?: boolean;
+ rdm_only?: boolean;
};
/** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */
header?: {
From 6db572393250a2c60d035d2cf5f2e744e58b7223 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Thu, 20 Jul 2023 17:33:42 +0200
Subject: [PATCH 10/41] Update FilesDialog services
- Add new API parameters
- Add new POST endpoint for creating entries
- Add some code docs
---
client/src/components/FilesDialog/services.ts | 34 ++++++++++++++++---
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/client/src/components/FilesDialog/services.ts b/client/src/components/FilesDialog/services.ts
index f022c7c4a0d4..12ad338dc08d 100644
--- a/client/src/components/FilesDialog/services.ts
+++ b/client/src/components/FilesDialog/services.ts
@@ -5,17 +5,43 @@ export type FilesSourcePlugin = components["schemas"]["FilesSourcePlugin"];
export type RemoteFile = components["schemas"]["RemoteFile"];
export type RemoteDirectory = components["schemas"]["RemoteDirectory"];
export type RemoteEntry = RemoteFile | RemoteDirectory;
+export type CreatedEntry = components["schemas"]["CreatedEntryResponse"];
const getRemoteFilesPlugins = fetcher.path("/api/remote_files/plugins").method("get").create();
-export async function getFileSources(): Promise
+ A new draft record with name {{ newEntry.name }} has been created in the repository.
+
+ You can preview the record in the repository and further edit its metadata at
+ Please use the button below to upload the exported {{ props.what }} to the record.
+ You need to create the new record in a repository before exporting the {{ props.what }} to it.
+
You can publish your history to one of the available DOI repositories here.
++ Your history export archive needs to be uploaded to an existing record. You will need to create + a new record on the repository or select an existing draft record and then export + your history to it. +
+- A new draft record with name {{ newEntry.name }} has been created in the repository. +
+ {{ newEntry.name }} + draft record has been created in the repository.
You can preview the record in the repository and further edit its metadata at
From f5c03952fbe0e459cc4cbaa0ea04e2d14df2804d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?David=20L=C3=B3pez?=
<46503462+davelopez@users.noreply.github.com>
Date: Wed, 26 Jul 2023 16:54:58 +0200
Subject: [PATCH 18/41] Fix wording in API doc
Co-authored-by: Marius van den Beek
- You can preview the record in the repository and further edit its metadata at
+ You can preview the record in the repository, further edit its metadata and decide when to
+ publish it at
You can publish your history to one of the available DOI repositories here. You can upload your history to one of the available DOI repositories here.
- Your history export archive needs to be uploaded to an existing record. You will need to create
- a new record on the repository or select an existing draft record and then export
- your history to it.
+ Your history export archive needs to be uploaded to an existing draft record. You will
+ need to create a new record on the repository or select an existing
+ draft record and then export your history to it.
You can upload your history to one of the available DOI repositories here. You can upload your history to one of the available RDM repositories here.
Your history export archive needs to be uploaded to an existing draft record. You will
need to create a new record on the repository or select an existing
@@ -249,7 +252,7 @@ function updateExportParams(newParams) {
public name you want to associate with your records or whether you want to publish them
immediately or keep them as drafts after export.
-
- Exporting to a RDM repository (e.g. Invenio RDM or Zenodo RDM) will require to create or
- select an existing record in the repository where the history archive will be uploaded. The
- export record will be associated with the archived history and you will be able to recreate the
- history later by importing it from the export record.
+ Exporting to a RDM repository (e.g. any
+
You may need to setup your credentials for the selected repository in your
From e9926036c34c89befad88df3738f17e2816dab22 Mon Sep 17 00:00:00 2001
From: davelopez <46503462+davelopez@users.noreply.github.com>
Date: Tue, 15 Aug 2023 15:16:15 +0200
Subject: [PATCH 39/41] Add unit tests for ExportRDMForm component
---
.../components/Common/ExportRDMForm.test.ts | 139 ++++++++++++++++++
.../src/components/Common/ExportRDMForm.vue | 24 ++-
2 files changed, 155 insertions(+), 8 deletions(-)
create mode 100644 client/src/components/Common/ExportRDMForm.test.ts
diff --git a/client/src/components/Common/ExportRDMForm.test.ts b/client/src/components/Common/ExportRDMForm.test.ts
new file mode 100644
index 000000000000..7f19404d7ab1
--- /dev/null
+++ b/client/src/components/Common/ExportRDMForm.test.ts
@@ -0,0 +1,139 @@
+import { getLocalVue } from "@tests/jest/helpers";
+import { mount, Wrapper } from "@vue/test-utils";
+import flushPromises from "flush-promises";
+
+import { mockFetcher } from "@/schema/__mocks__";
+
+import { CreatedEntry } from "../FilesDialog/services";
+
+import ExportRDMForm from "./ExportRDMForm.vue";
+import FilesInput from "@/components/FilesDialog/FilesInput.vue";
+
+jest.mock("@/schema");
+
+const localVue = getLocalVue(true);
+
+const CREATE_RECORD_BTN = "#create-record-button";
+const EXPORT_TO_NEW_RECORD_BTN = "#export-button-new-record";
+const EXPORT_TO_EXISTING_RECORD_BTN = "#export-button-existing-record";
+
+const FAKE_RDM_SOURCE_URI = "gxfiles://test-uri";
+const FAKE_RDM_EXISTING_RECORD_URI = "gxfiles://test-uri/test-record";
+const FAKE_RECORD_NAME = "test record name";
+const FAKE_ENTRY: CreatedEntry = {
+ uri: FAKE_RDM_SOURCE_URI,
+ name: FAKE_RECORD_NAME,
+ external_link: "http://example.com",
+};
+
+async function initWrapper() {
+ mockFetcher.path("/api/remote_files").method("post").mock({ data: FAKE_ENTRY });
+
+ const wrapper = mount(ExportRDMForm, {
+ propsData: {},
+ localVue,
+ });
+ await flushPromises();
+ return wrapper;
+}
+
+describe("ExportRDMForm", () => {
+ let wrapper: Wrapper Please use the button below to upload the exported {{ props.what }} to the record.
You need to create the new record in a repository before exporting the {{ props.what }} to it.