From 28a5fa6e483e2fd342efa69b26ca74e54e4f12cb Mon Sep 17 00:00:00 2001 From: John Chilton Date: Mon, 30 Sep 2024 10:01:12 -0400 Subject: [PATCH] More ability to set global defaults for file source plugin types. --- lib/galaxy/config/__init__.py | 5 +++++ lib/galaxy/config/schemas/config_schema.yml | 18 ++++++++++++++++++ lib/galaxy/files/plugins.py | 19 +++++++++++++++++++ lib/galaxy/files/sources/s3fs.py | 14 +++++++++++++- lib/galaxy/files/sources/webdav.py | 15 +++++++++++++-- 5 files changed, 68 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index ecf8f8aebcfb..80d57ab04bf6 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -704,6 +704,7 @@ class GalaxyAppConfiguration(BaseAppConfiguration, CommonConfigurationMixin): drmaa_external_runjob_script: str email_from: Optional[str] enable_tool_shed_check: bool + file_source_temp_dir: str galaxy_data_manager_data_path: str galaxy_infrastructure_url: str hours_between_check: int @@ -1236,6 +1237,9 @@ def _load_theme(path: str, theme_dict: dict): else: _load_theme(self.themes_config_file, self.themes) + if self.file_source_temp_dir: + self.file_source_temp_dir = os.path.abspath(self.file_source_temp_dir) + def _process_celery_config(self): if self.celery_conf and self.celery_conf.get("result_backend") is None: # If the result_backend is not set, use a SQLite database in the data directory @@ -1348,6 +1352,7 @@ def check(self): self.template_cache_path, self.tool_data_path, self.user_library_import_dir, + self.file_source_temp_dir, ] for path in paths_to_check: self._ensure_directory(path) diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 5098d923d8ca..525a43d51e5b 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -4089,3 +4089,21 @@ mapping: per_host: true desc: | Enable the integration of the Galaxy Help Forum in the tool panel. This requires the help_forum_api_url to be set. + + file_source_temp_dir: + type: str + required: false + desc: | + Directory to store temporary files for file sources. This defaults to new_file_path if not set. + + file_source_webdav_use_temp_files: + type: bool + default: false + desc: | + Default value for use_temp_files for webdav plugins that don't explicitly declare this. + + file_source_s3fs_listings_expiry_time: + type: int + default: 60 + desc: | + Default value for listings_expiry_time for s3fs file source plugins that don't explicitly declare this. diff --git a/lib/galaxy/files/plugins.py b/lib/galaxy/files/plugins.py index 4f9c182f846a..009d232c808d 100644 --- a/lib/galaxy/files/plugins.py +++ b/lib/galaxy/files/plugins.py @@ -24,6 +24,9 @@ class FileSourcePluginsConfig: user_library_import_dir: Optional[str] ftp_upload_dir: Optional[str] ftp_upload_purge: bool + tmp_dir: Optional[str] + webdav_use_temp_files: bool + s3fs_listings_expiry_time: Optional[int] def __init__( self, @@ -33,6 +36,9 @@ def __init__( user_library_import_dir=None, ftp_upload_dir=None, ftp_upload_purge=True, + tmp_dir=None, + webdav_use_temp_files=False, + s3fs_listings_expiry_time=None, ): symlink_allowlist = symlink_allowlist or [] fetch_url_allowlist = fetch_url_allowlist or [] @@ -42,6 +48,9 @@ def __init__( self.user_library_import_dir = user_library_import_dir self.ftp_upload_dir = ftp_upload_dir self.ftp_upload_purge = ftp_upload_purge + self.tmp_dir = tmp_dir + self.webdav_use_temp_files = webdav_use_temp_files + self.s3fs_listings_expiry_time = s3fs_listings_expiry_time @staticmethod def from_app_config(config): @@ -54,6 +63,10 @@ def from_app_config(config): kwds["user_library_import_dir"] = config.user_library_import_dir kwds["ftp_upload_dir"] = config.ftp_upload_dir kwds["ftp_upload_purge"] = config.ftp_upload_purge + kwds["tmp_dir"] = config.file_source_temp_dir + kwds["webdav_use_temp_files"] = config.file_source_webdav_use_temp_files + kwds["s3fs_listings_expiry_time"] = config.file_source_s3fs_listings_expiry_time + return FileSourcePluginsConfig(**kwds) def to_dict(self): @@ -64,6 +77,9 @@ def to_dict(self): "user_library_import_dir": self.user_library_import_dir, "ftp_upload_dir": self.ftp_upload_dir, "ftp_upload_purge": self.ftp_upload_purge, + "tmp_dir": self.tmp_dir, + "webdav_use_temp_files": self.webdav_use_temp_files, + "s3fs_listings_expiry_time": self.s3fs_listings_expiry_time, } @staticmethod @@ -75,6 +91,9 @@ def from_dict(as_dict): user_library_import_dir=as_dict["user_library_import_dir"], ftp_upload_dir=as_dict["ftp_upload_dir"], ftp_upload_purge=as_dict["ftp_upload_purge"], + tmp_dir=as_dict.get("tmp_dir"), + webdav_use_temp_files=as_dict.get("webdav_use_temp_files"), + s3fs_listings_expiry_time=as_dict.get("s3fs_listings_expiry_time"), ) diff --git a/lib/galaxy/files/sources/s3fs.py b/lib/galaxy/files/sources/s3fs.py index 6967f4a5b6a7..2449754e3830 100644 --- a/lib/galaxy/files/sources/s3fs.py +++ b/lib/galaxy/files/sources/s3fs.py @@ -8,7 +8,10 @@ Tuple, ) -from typing_extensions import Unpack +from typing_extensions import ( + NotRequired, + Unpack, +) from galaxy.files import OptionalUserContext from . import ( @@ -35,6 +38,7 @@ class S3FsFilesSourceProperties(FilesSourceProperties, total=False): endpoint_url: int user: str passwd: str + listings_expiry_time: NotRequired[Optional[int]] client_kwargs: dict # internally computed. Should not be specified in config file @@ -45,6 +49,14 @@ def __init__(self, **kwd: Unpack[S3FsFilesSourceProperties]): if s3fs is None: raise Exception("Package s3fs unavailable but required for this file source plugin.") props: S3FsFilesSourceProperties = cast(S3FsFilesSourceProperties, self._parse_common_config_opts(kwd)) + file_sources_config = self._file_sources_config + if ( + props.get("listings_expiry_time") is None + and file_sources_config + and file_sources_config.s3fs_listings_expiry_time + ): + if file_sources_config.s3fs_listings_expiry_time: + props["listings_expiry_time"] = file_sources_config.s3fs_listings_expiry_time # There is a possibility that the bucket name could be parameterized: e.g. # bucket: ${user.preferences['generic_s3|bucket']} # that's ok, because we evaluate the bucket name again later. The bucket property here will only diff --git a/lib/galaxy/files/sources/webdav.py b/lib/galaxy/files/sources/webdav.py index 2e11a51d98b2..29490f479ff0 100644 --- a/lib/galaxy/files/sources/webdav.py +++ b/lib/galaxy/files/sources/webdav.py @@ -23,11 +23,22 @@ class WebDavFilesSource(PyFilesystem2FilesSource): def _open_fs(self, user_context=None, opts: Optional[FilesSourceOptions] = None): props = self._serialization_props(user_context) + file_sources_config = self._file_sources_config use_temp_files = props.pop("use_temp_files", None) + if use_temp_files is None and file_sources_config and file_sources_config.webdav_use_temp_files: + use_temp_files = file_sources_config.webdav_use_temp_files if use_temp_files is None: # Default to True to avoid memory issues with large files. - props["use_temp_files"] = True - props["temp_path"] = props.get("temp_path", tempfile.TemporaryDirectory(prefix="webdav_")) + use_temp_files = True + + if use_temp_files: + temp_path = props.get("temp_path") + if temp_path is None and file_sources_config and file_sources_config.tmp_dir: + temp_path = file_sources_config.tmp_dir + if temp_path is None: + temp_path = tempfile.TemporaryDirectory(prefix="webdav_") + props["temp_path"] = temp_path + props["use_temp_files"] = use_temp_files extra_props: Union[FilesSourceProperties, dict] = opts.extra_props or {} if opts else {} handle = WebDAVFS(**{**props, **extra_props}) return handle