diff --git a/lib/galaxy/files/sources/__init__.py b/lib/galaxy/files/sources/__init__.py index 19fcc1b21475..fd1e84670173 100644 --- a/lib/galaxy/files/sources/__init__.py +++ b/lib/galaxy/files/sources/__init__.py @@ -277,6 +277,10 @@ def to_dict(self, for_serialization=False, user_context: "OptionalUserContext" = context doesn't need to be present after the plugin is re-hydrated. """ + @abc.abstractmethod + def prefer_links(self) -> bool: + """Prefer linking to files.""" + class SupportsBrowsing(metaclass=abc.ABCMeta): """An interface indicating that this filesource is browsable. @@ -351,6 +355,9 @@ def user_has_access(self, user_context: "OptionalUserContext") -> bool: or (self._user_has_required_roles(user_context) and self._user_has_required_groups(user_context)) ) + def prefer_links(self) -> bool: + return False + @property def user_context_required(self) -> bool: return self.requires_roles is not None or self.requires_groups is not None diff --git a/lib/galaxy/files/sources/posix.py b/lib/galaxy/files/sources/posix.py index 8768b5b5ca9c..258d5ef20a69 100644 --- a/lib/galaxy/files/sources/posix.py +++ b/lib/galaxy/files/sources/posix.py @@ -26,6 +26,7 @@ DEFAULT_ENFORCE_SYMLINK_SECURITY = True DEFAULT_DELETE_ON_REALIZE = False DEFAULT_ALLOW_SUBDIR_CREATION = True +DEFAULT_PREFER_LINKS = False class PosixFilesSourceProperties(FilesSourceProperties, total=False): @@ -33,6 +34,7 @@ class PosixFilesSourceProperties(FilesSourceProperties, total=False): enforce_symlink_security: bool delete_on_realize: bool allow_subdir_creation: bool + prefer_links: bool class PosixFilesSource(BaseFilesSource): @@ -53,6 +55,10 @@ def __init__(self, **kwd: Unpack[PosixFilesSourceProperties]): self.enforce_symlink_security = props.get("enforce_symlink_security", DEFAULT_ENFORCE_SYMLINK_SECURITY) self.delete_on_realize = props.get("delete_on_realize", DEFAULT_DELETE_ON_REALIZE) self.allow_subdir_creation = props.get("allow_subdir_creation", DEFAULT_ALLOW_SUBDIR_CREATION) + self._prefer_links = props.get("prefer_links", DEFAULT_PREFER_LINKS) + + def prefer_links(self) -> bool: + return self._prefer_links def _list( self, @@ -182,6 +188,7 @@ def _serialization_props(self, user_context: OptionalUserContext = None) -> Posi "enforce_symlink_security": self.enforce_symlink_security, "delete_on_realize": self.delete_on_realize, "allow_subdir_creation": self.allow_subdir_creation, + "prefer_links": self._prefer_links, } @property diff --git a/lib/galaxy/files/uris.py b/lib/galaxy/files/uris.py index d35e51d696c4..9d48853294d6 100644 --- a/lib/galaxy/files/uris.py +++ b/lib/galaxy/files/uris.py @@ -49,8 +49,7 @@ def stream_url_to_file( target_path: Optional[str] = None, file_source_opts: Optional[FilesSourceOptions] = None, ) -> str: - if file_sources is None: - file_sources = ConfiguredFileSources.from_dict(None, load_stock_plugins=True) + file_sources = ensure_file_sources(file_sources) file_source, rel_path = file_sources.get_file_source_path(url) if file_source: if not target_path: @@ -62,6 +61,12 @@ def stream_url_to_file( raise NoMatchingFileSource(f"Could not find a matching handler for: {url}") +def ensure_file_sources(file_sources: Optional["ConfiguredFileSources"]) -> "ConfiguredFileSources": + if file_sources is None: + file_sources = ConfiguredFileSources.from_dict(None, load_stock_plugins=True) + return file_sources + + def stream_to_file(stream, suffix="", prefix="", dir=None, text=False, **kwd): """Writes a stream to a temporary file, returns the temporary file's name""" fd, temp_name = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text) diff --git a/lib/galaxy/tools/data_fetch.py b/lib/galaxy/tools/data_fetch.py index bd5f1ae0b8e6..02042e6bcaa7 100644 --- a/lib/galaxy/tools/data_fetch.py +++ b/lib/galaxy/tools/data_fetch.py @@ -23,6 +23,7 @@ UploadProblemException, ) from galaxy.files.uris import ( + ensure_file_sources, stream_to_file, stream_url_to_file, ) @@ -97,13 +98,13 @@ def expand_elements_from(target_or_item): decompressed_directory = _decompress_target(upload_config, target_or_item) items = _directory_to_items(decompressed_directory) elif elements_from == "bagit": - _, elements_from_path = _has_src_to_path(upload_config, target_or_item, is_dataset=False) + _, elements_from_path, _ = _has_src_to_path(upload_config, target_or_item, is_dataset=False) items = _bagit_to_items(elements_from_path) elif elements_from == "bagit_archive": decompressed_directory = _decompress_target(upload_config, target_or_item) items = _bagit_to_items(decompressed_directory) elif elements_from == "directory": - _, elements_from_path = _has_src_to_path(upload_config, target_or_item, is_dataset=False) + _, elements_from_path, _ = _has_src_to_path(upload_config, target_or_item, is_dataset=False) items = _directory_to_items(elements_from_path) else: raise Exception(f"Unknown elements from type encountered [{elements_from}]") @@ -205,7 +206,7 @@ def _resolve_item(item): pass key = keys[composite_item_idx] writable_file = writable_files[key] - _, src_target = _has_src_to_path(upload_config, composite_item) + _, src_target, _ = _has_src_to_path(upload_config, composite_item) # do the writing sniff.handle_composite_file( datatype, @@ -238,10 +239,23 @@ def _resolve_item_with_primary(item): converted_path = None deferred = upload_config.get_option(item, "deferred") + + link_data_only = upload_config.link_data_only + link_data_only_explicit = upload_config.link_data_only_explicit + if "link_data_only" in item: + # Allow overriding this on a per file basis. + link_data_only, link_data_only_explicit = _link_data_only(item) + name: str path: Optional[str] + default_in_place = False if not deferred: - name, path = _has_src_to_path(upload_config, item, is_dataset=True) + name, path, is_link = _has_src_to_path( + upload_config, item, is_dataset=True, link_data_only_explicitly_set=link_data_only_explicit + ) + if is_link: + link_data_only = True + default_in_place = True else: name, path = _has_src_to_name(item) or "Deferred Dataset", None sources = [] @@ -266,10 +280,6 @@ def _resolve_item_with_primary(item): item["error_message"] = error_message dbkey = item.get("dbkey", "?") - link_data_only = upload_config.link_data_only - if "link_data_only" in item: - # Allow overriding this on a per file basis. - link_data_only = _link_data_only(item) ext = "data" staged_extra_files = None @@ -281,7 +291,7 @@ def _resolve_item_with_primary(item): effective_state = "ok" if not deferred and not error_message: - in_place = item.get("in_place", False) + in_place = item.get("in_place", default_in_place) purge_source = item.get("purge_source", True) registry = upload_config.registry @@ -425,7 +435,7 @@ def _bagit_to_items(directory): def _decompress_target(upload_config: "UploadConfig", target: Dict[str, Any]): - elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False) + elements_from_name, elements_from_path, _ = _has_src_to_path(upload_config, target, is_dataset=False) # by default Galaxy will check for a directory with a single file and interpret that # as the new root for expansion, this is a good user experience for uploading single # files in a archive but not great from an API perspective. Allow disabling by setting @@ -483,13 +493,33 @@ def _has_src_to_name(item) -> Optional[str]: return name -def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dataset: bool = False) -> Tuple[str, str]: +def _has_src_to_path( + upload_config: "UploadConfig", + item: Dict[str, Any], + is_dataset: bool = False, + link_data_only: bool = False, + link_data_only_explicitly_set: bool = False, +) -> Tuple[str, str, bool]: assert "src" in item, item src = item.get("src") name = item.get("name") + is_link = False if src == "url": url = item.get("url") + file_sources = ensure_file_sources(upload_config.file_sources) assert url, "url cannot be empty" + if not link_data_only_explicitly_set: + file_source, rel_path = file_sources.get_file_source_path(url) + prefer_links = file_source.prefer_links() + if file_source.prefer_links(): + if rel_path.startswith("/"): + rel_path = rel_path[1:] + path = os.path.abspath(os.path.join(file_source.root, rel_path)) + if name is None: + name = url.split("/")[-1] + is_link = True + return name, path, is_link + try: path = stream_url_to_file(url, file_sources=upload_config.file_sources, dir=upload_config.working_directory) except Exception as e: @@ -513,7 +543,7 @@ def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dat path = item["path"] if name is None: name = os.path.basename(path) - return name, path + return name, path, is_link def _handle_hash_validation(hash_function: HashFunctionNameEnum, hash_value: str, path: str): @@ -564,7 +594,7 @@ def __init__( self.space_to_tab = request.get("space_to_tab", False) self.auto_decompress = request.get("auto_decompress", False) self.deferred = request.get("deferred", False) - self.link_data_only = _link_data_only(request) + self.link_data_only, self.link_data_only_explicit = _link_data_only(request) self.file_sources_dict = file_sources_dict self._file_sources = None @@ -616,12 +646,19 @@ def ensure_in_working_directory(self, path, purge_source, in_place): return new_path -def _link_data_only(has_config_dict): - link_data_only = has_config_dict.get("link_data_only", False) - if not isinstance(link_data_only, bool): - # Allow the older string values of 'copy_files' and 'link_to_files' - link_data_only = link_data_only == "copy_files" - return link_data_only +def _link_data_only(has_config_dict) -> Tuple[bool, bool]: + if "link_data_only" in has_config_dict: + link_data_only_raw = has_config_dict["link_data_only"] + if not isinstance(link_data_only_raw, bool): + # Allow the older string values of 'copy_files' and 'link_to_files' + link_data_only = link_data_only_raw == "copy_files" + else: + link_data_only = link_data_only_raw + link_data_only_explicit = True + else: + link_data_only = False + link_data_only_explicit = False + return link_data_only, link_data_only_explicit def _for_each_src(f, obj): diff --git a/lib/galaxy_test/driver/integration_setup.py b/lib/galaxy_test/driver/integration_setup.py index b63b567247ea..30194e63c830 100644 --- a/lib/galaxy_test/driver/integration_setup.py +++ b/lib/galaxy_test/driver/integration_setup.py @@ -15,7 +15,9 @@ REQUIRED_GROUP_EXPRESSION = f"{GROUP_A} or '{GROUP_B}'" -def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include_test_data_dir: bool) -> str: +def get_posix_file_source_config( + root_dir: str, roles: str, groups: str, include_test_data_dir: bool, prefer_links: bool = False +) -> str: rval = f""" - type: posix id: posix_test @@ -26,6 +28,17 @@ def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include requires_roles: {roles} requires_groups: {groups} """ + if prefer_links: + rval += f""" +- type: posix + id: linking_source + label: Posix + doc: Files from local path to links + root: {root_dir} + writable: true + prefer_links: true +""" + if include_test_data_dir: rval += """ - type: posix @@ -44,9 +57,10 @@ def create_file_source_config_file_on( include_test_data_dir, required_role_expression, required_group_expression, + prefer_links: bool = False, ): file_contents = get_posix_file_source_config( - root_dir, required_role_expression, required_group_expression, include_test_data_dir + root_dir, required_role_expression, required_group_expression, include_test_data_dir, prefer_links=prefer_links ) file_path = os.path.join(temp_dir, "file_sources_conf_posix.yml") with open(file_path, "w") as f: @@ -67,6 +81,7 @@ def handle_galaxy_config_kwds( # Require role for access but do not require groups by default on every test to simplify them required_role_expression=REQUIRED_ROLE_EXPRESSION, required_group_expression="", + prefer_links: bool = False, ): temp_dir = os.path.realpath(mkdtemp()) clazz_ = clazz_ or cls @@ -79,6 +94,7 @@ def handle_galaxy_config_kwds( clazz_.include_test_data_dir, required_role_expression, required_group_expression, + prefer_links=prefer_links, ) config["file_sources_config_file"] = file_sources_config_file diff --git a/test/integration/test_remote_files_posix.py b/test/integration/test_remote_files_posix.py index 7d588cdf9d25..0fbc4b7f8d93 100644 --- a/test/integration/test_remote_files_posix.py +++ b/test/integration/test_remote_files_posix.py @@ -1,8 +1,8 @@ -# Before running this test, start nginx+webdav in Docker using following command: -# docker run -v `pwd`/test/integration/webdav/data:/media -e WEBDAV_USERNAME=alice -e WEBDAV_PASSWORD=secret1234 -p 7083:7083 jmchilton/webdavdev -# Apache Docker host (shown next) doesn't work because displayname not set in response. -# docker run -v `pwd`/test/integration/webdav:/var/lib/dav -e AUTH_TYPE=Basic -e USERNAME=alice -e PASSWORD=secret1234 -e LOCATION=/ -p 7083:80 bytemark/webdav +import os +from sqlalchemy import select + +from galaxy.model import Dataset from galaxy_test.base import api_asserts from galaxy_test.base.populators import DatasetPopulator from galaxy_test.driver import integration_util @@ -108,3 +108,41 @@ def _assert_list_response_matches_fixtures(self, list_response): def _assert_access_forbidden_response(self, response): api_asserts.assert_status_code_is(response, 403) + + +class TestPreferLinksPosixFileSourceIntegration(PosixFileSourceSetup, integration_util.IntegrationTestCase): + dataset_populator: DatasetPopulator + + @classmethod + def handle_galaxy_config_kwds(cls, config): + PosixFileSourceSetup.handle_galaxy_config_kwds( + config, + cls, + prefer_links=True, + ) + + def setUp(self): + super().setUp() + self._write_file_fixtures() + self.dataset_populator = DatasetPopulator(self.galaxy_interactor) + + def test_links_by_default(self): + with self.dataset_populator.test_history() as history_id: + element = dict(src="url", url="gxfiles://linking_source/a") + target = { + "destination": {"type": "hdas"}, + "elements": [element], + } + targets = [target] + payload = { + "history_id": history_id, + "targets": targets, + } + new_dataset = self.dataset_populator.fetch(payload, assert_ok=True).json()["outputs"][0] + content = self.dataset_populator.get_history_dataset_content(history_id, dataset=new_dataset) + assert content == "a\n", content + stmt = select(Dataset).order_by(Dataset.create_time.desc()).limit(1) + dataset = self._app.model.session.execute(stmt).unique().scalar_one() + assert dataset.external_filename.endswith("/root/a") + assert os.path.exists(dataset.external_filename) + assert open(dataset.external_filename).read() == "a\n"