Skip to content

Commit

Permalink
Constrain http filesource by fetch_url_allowlist while still followin…
Browse files Browse the repository at this point in the history
…g redirects
  • Loading branch information
dannon committed Sep 21, 2023
1 parent 9d79596 commit 5ff729d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
6 changes: 6 additions & 0 deletions lib/galaxy/files/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,16 @@ class ConfiguredFileSourcesConfig:
def __init__(
self,
symlink_allowlist=None,
fetch_url_allowlist=None,
library_import_dir=None,
user_library_import_dir=None,
ftp_upload_dir=None,
ftp_upload_purge=True,
):
symlink_allowlist = symlink_allowlist or []
fetch_url_allowlist = fetch_url_allowlist or []
self.symlink_allowlist = symlink_allowlist
self.fetch_url_allowlist = fetch_url_allowlist
self.library_import_dir = library_import_dir
self.user_library_import_dir = user_library_import_dir
self.ftp_upload_dir = ftp_upload_dir
Expand All @@ -233,6 +236,7 @@ def from_app_config(config):
# for this component.
kwds = {}
kwds["symlink_allowlist"] = getattr(config, "user_library_import_symlink_allowlist", [])
kwds["fetch_url_allowlist"] = getattr(config, "fetch_url_allowlist", [])
kwds["library_import_dir"] = getattr(config, "library_import_dir", None)
kwds["user_library_import_dir"] = getattr(config, "user_library_import_dir", None)
kwds["ftp_upload_dir"] = getattr(config, "ftp_upload_dir", None)
Expand All @@ -242,6 +246,7 @@ def from_app_config(config):
def to_dict(self):
return {
"symlink_allowlist": self.symlink_allowlist,
"fetch_url_allowlist": self.fetch_url_allowlist,
"library_import_dir": self.library_import_dir,
"user_library_import_dir": self.user_library_import_dir,
"ftp_upload_dir": self.ftp_upload_dir,
Expand All @@ -252,6 +257,7 @@ def to_dict(self):
def from_dict(as_dict):
return ConfiguredFileSourcesConfig(
symlink_allowlist=as_dict["symlink_allowlist"],
fetch_url_allowlist=as_dict["fetch_url_allowlist"],
library_import_dir=as_dict["library_import_dir"],
user_library_import_dir=as_dict["user_library_import_dir"],
ftp_upload_dir=as_dict["ftp_upload_dir"],
Expand Down
8 changes: 7 additions & 1 deletion lib/galaxy/files/sources/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from typing_extensions import Unpack

from galaxy.files.uris import validate_non_local
from galaxy.util import (
DEFAULT_SOCKET_TIMEOUT,
get_charset_from_http_headers,
Expand Down Expand Up @@ -45,17 +46,22 @@ def __init__(self, **kwd: Unpack[FilesSourceProperties]):
self._url_regex = re.compile(self._url_regex_str)
self._props = props

@property
def _allowlist(self):
return self._file_sources_config.fetch_url_allowlist

def _realize_to(
self, source_path: str, native_path: str, user_context=None, opts: Optional[FilesSourceOptions] = None
):
props = self._serialization_props(user_context)
extra_props: HTTPFilesSourceProperties = cast(HTTPFilesSourceProperties, opts.extra_props or {} if opts else {})
headers = props.pop("http_headers", {}) or {}
headers.update(extra_props.get("http_headers") or {})

req = urllib.request.Request(source_path, headers=headers)

with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page:
# Verify url post-redirects is still allowlisted
validate_non_local(page.geturl(), self._allowlist)
f = open(native_path, "wb") # fd will be .close()ed in stream_to_open_named_file
return stream_to_open_named_file(
page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
Expand Down

0 comments on commit 5ff729d

Please sign in to comment.