From 5ff729d910a218ecfb26befae23c7503d7405d08 Mon Sep 17 00:00:00 2001 From: Dannon Baker Date: Wed, 30 Aug 2023 16:38:11 -0400 Subject: [PATCH] Constrain http filesource by fetch_url_allowlist while still following redirects --- lib/galaxy/files/__init__.py | 6 ++++++ lib/galaxy/files/sources/http.py | 8 +++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/files/__init__.py b/lib/galaxy/files/__init__.py index 20dc7d78d1f5..a16843d4a3fe 100644 --- a/lib/galaxy/files/__init__.py +++ b/lib/galaxy/files/__init__.py @@ -215,13 +215,16 @@ class ConfiguredFileSourcesConfig: def __init__( self, symlink_allowlist=None, + fetch_url_allowlist=None, library_import_dir=None, user_library_import_dir=None, ftp_upload_dir=None, ftp_upload_purge=True, ): symlink_allowlist = symlink_allowlist or [] + fetch_url_allowlist = fetch_url_allowlist or [] self.symlink_allowlist = symlink_allowlist + self.fetch_url_allowlist = fetch_url_allowlist self.library_import_dir = library_import_dir self.user_library_import_dir = user_library_import_dir self.ftp_upload_dir = ftp_upload_dir @@ -233,6 +236,7 @@ def from_app_config(config): # for this component. kwds = {} kwds["symlink_allowlist"] = getattr(config, "user_library_import_symlink_allowlist", []) + kwds["fetch_url_allowlist"] = getattr(config, "fetch_url_allowlist", []) kwds["library_import_dir"] = getattr(config, "library_import_dir", None) kwds["user_library_import_dir"] = getattr(config, "user_library_import_dir", None) kwds["ftp_upload_dir"] = getattr(config, "ftp_upload_dir", None) @@ -242,6 +246,7 @@ def from_app_config(config): def to_dict(self): return { "symlink_allowlist": self.symlink_allowlist, + "fetch_url_allowlist": self.fetch_url_allowlist, "library_import_dir": self.library_import_dir, "user_library_import_dir": self.user_library_import_dir, "ftp_upload_dir": self.ftp_upload_dir, @@ -252,6 +257,7 @@ def to_dict(self): def from_dict(as_dict): return ConfiguredFileSourcesConfig( symlink_allowlist=as_dict["symlink_allowlist"], + fetch_url_allowlist=as_dict["fetch_url_allowlist"], library_import_dir=as_dict["library_import_dir"], user_library_import_dir=as_dict["user_library_import_dir"], ftp_upload_dir=as_dict["ftp_upload_dir"], diff --git a/lib/galaxy/files/sources/http.py b/lib/galaxy/files/sources/http.py index 83ecd3b0b9e2..e3f2bcc8a0f4 100644 --- a/lib/galaxy/files/sources/http.py +++ b/lib/galaxy/files/sources/http.py @@ -9,6 +9,7 @@ from typing_extensions import Unpack +from galaxy.files.uris import validate_non_local from galaxy.util import ( DEFAULT_SOCKET_TIMEOUT, get_charset_from_http_headers, @@ -45,6 +46,10 @@ def __init__(self, **kwd: Unpack[FilesSourceProperties]): self._url_regex = re.compile(self._url_regex_str) self._props = props + @property + def _allowlist(self): + return self._file_sources_config.fetch_url_allowlist + def _realize_to( self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None ): @@ -52,10 +57,11 @@ def _realize_to( extra_props: HTTPFilesSourceProperties = cast(HTTPFilesSourceProperties, opts.extra_props or {} if opts else {}) headers = props.pop("http_headers", {}) or {} headers.update(extra_props.get("http_headers") or {}) - req = urllib.request.Request(source_path, headers=headers) with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page: + # Verify url post-redirects is still allowlisted + validate_non_local(page.geturl(), self._allowlist) f = open(native_path, "wb") # fd will be .close()ed in stream_to_open_named_file return stream_to_open_named_file( page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)