From e7d1380a7f0daa88b455d9a2e978f6e65c227125 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 4 Jul 2024 10:46:28 +0200 Subject: [PATCH 1/4] Raise ValueError if invalid ID passed I suspect a state bug in the tool form code is the issue here and the id refers to a HDA id, but this is a solid fix anyway if someone submits wrong values via the API. Fixes https://github.com/galaxyproject/galaxy/issues/18485: ``` Stack Trace Newest AssertionError: null File "galaxy/web/framework/decorators.py", line 346, in decorator rval = func(self, trans, *args, **kwargs) File "galaxy/webapps/galaxy/api/tools.py", line 247, in build return tool.to_json(trans, kwd.get("inputs", kwd), history=history) File "galaxy/tools/__init__.py", line 2510, in to_json populate_state(request_context, self.inputs, params.__dict__, state_inputs, state_errors) File "galaxy/tools/parameters/__init__.py", line 412, in populate_state _populate_state_legacy( File "galaxy/tools/parameters/__init__.py", line 625, in _populate_state_legacy check_param(request_context, input, param_value, context, simple_errors=simple_errors) File "galaxy/tools/parameters/__init__.py", line 246, in check_param value = param.from_json(value, trans, param_values) File "galaxy/tools/parameters/basic.py", line 2132, in from_json value = self.to_python(value, trans.app) File "galaxy/tools/parameters/basic.py", line 1963, in to_python return history_item_dict_to_python(value["values"][0], app, self.name) File "galaxy/tools/parameters/basic.py", line 2799, in history_item_dict_to_python return src_id_to_item(sa_session=app.model.context, security=app.security, value=value) File "galaxy/tools/parameters/basic.py", line 2056, in src_id_to_item assert item ``` --- lib/galaxy/tools/parameters/basic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index ebdad3e37aed..8d243cb88014 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -2053,7 +2053,8 @@ def src_id_to_item( item = sa_session.get(src_to_class[value["src"]], decoded_id) except KeyError: raise ValueError(f"Unknown input source {value['src']} passed to job submission API.") - assert item + if not item: + raise ValueError("Invalid input id passed to job submission API.") item.extra_params = {k: v for k, v in value.items() if k not in ("src", "id")} return item From 18157159cb93f50b28dff27e2ea154ecc553d30f Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 4 Jul 2024 10:49:39 +0200 Subject: [PATCH 2/4] Downgrade missing workflow outputs to warning Fixes https://sentry.galaxyproject.org/share/issue/3fa02a539eee4787a465c0b99903df3a/ --- lib/galaxy/workflow/modules.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py index 0ad9f46d5c16..d0eb5025d5c4 100644 --- a/lib/galaxy/workflow/modules.py +++ b/lib/galaxy/workflow/modules.py @@ -746,7 +746,8 @@ def get_all_outputs(self, data_only=False): # This can happen when importing workflows with missing tools. # We can't raise an exception here, as that would prevent loading # the workflow. - log.error( + # This is also listed when opening such a workflow in the workflow editor. + log.warning( f"Workflow output '{workflow_output['output_name']}' defined, but not listed among data outputs" ) continue From 56d22baa6a092f329f5d997c7c6465881b54e17e Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 4 Jul 2024 11:19:53 +0200 Subject: [PATCH 3/4] Use get_file_obj to render compressed data in mako We were mostly doing this already. Also raises an exception in `text_data` if content is not decodable. Fixes https://sentry.galaxyproject.org/share/issue/b826c0be95a4424e8b1d63983255f23d/: ``` UnicodeDecodeError: 'utf-8' codec can't decode byte 0x8b in position 1: invalid start byte File "galaxy/web/framework/middleware/error.py", line 167, in __call__ app_iter = self.application(environ, sr_checker) File "galaxy/web/framework/middleware/statsd.py", line 29, in __call__ req = self.application(environ, start_response) File "/cvmfs/main.galaxyproject.org/venv/lib/python3.11/site-packages/paste/httpexceptions.py", line 635, in __call__ return self.application(environ, start_response) File "galaxy/web/framework/base.py", line 174, in __call__ return self.handle_request(request_id, path_info, environ, start_response) File "galaxy/web/framework/base.py", line 263, in handle_request body = method(trans, **kwargs) File "galaxy/webapps/galaxy/controllers/dataset.py", line 152, in display display_data, headers = data.datatype.display_data( File "galaxy/datatypes/tabular.py", line 199, in display_data truncated_data=open(dataset.get_file_name()).read(max_peek_size), File "", line 322, in decode ``` --- lib/galaxy/datatypes/data.py | 42 +++++++++++++++++---------------- lib/galaxy/datatypes/tabular.py | 17 ++++++------- lib/galaxy/managers/hdas.py | 5 +++- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/lib/galaxy/datatypes/data.py b/lib/galaxy/datatypes/data.py index 308e691a02cc..05f5adbad6bf 100644 --- a/lib/galaxy/datatypes/data.py +++ b/lib/galaxy/datatypes/data.py @@ -484,16 +484,17 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd): def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size): headers["content-type"] = "text/html" - return ( - trans.fill_template_mako( - "/dataset/binary_file.mako", - data=data, - file_contents=open(data.get_file_name(), "rb").read(max_peek_size), - file_size=util.nice_size(file_size), - truncated=file_size > max_peek_size, - ), - headers, - ) + with open(data.get_file_name(), "rb") as fh: + return ( + trans.fill_template_mako( + "/dataset/binary_file.mako", + data=data, + file_contents=fh.read(max_peek_size), + file_size=util.nice_size(file_size), + truncated=file_size > max_peek_size, + ), + headers, + ) def _serve_file_contents(self, trans, data, headers, preview, file_size, max_peek_size): from galaxy.datatypes import images @@ -502,16 +503,17 @@ def _serve_file_contents(self, trans, data, headers, preview, file_size, max_pee if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size: return self._yield_user_file_content(trans, data, data.get_file_name(), headers), headers - # preview large text file - headers["content-type"] = "text/html" - return ( - trans.fill_template_mako( - "/dataset/large_file.mako", - truncated_data=open(data.get_file_name(), "rb").read(max_peek_size), - data=data, - ), - headers, - ) + with compression_utils.get_fileobj(data.get_file_name(), "rb") as fh: + # preview large text file + headers["content-type"] = "text/html" + return ( + trans.fill_template_mako( + "/dataset/large_file.mako", + truncated_data=fh.read(max_peek_size), + data=data, + ), + headers, + ) def display_data( self, diff --git a/lib/galaxy/datatypes/tabular.py b/lib/galaxy/datatypes/tabular.py index 5b4e1d523e12..8bfde1202c31 100644 --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -193,14 +193,15 @@ def display_data( return open(dataset.get_file_name(), mode="rb"), headers else: headers["content-type"] = "text/html" - return ( - trans.fill_template_mako( - "/dataset/large_file.mako", - truncated_data=open(dataset.get_file_name()).read(max_peek_size), - data=dataset, - ), - headers, - ) + with compression_utils.get_fileobj(dataset.get_file_name(), "rb") as fh: + return ( + trans.fill_template_mako( + "/dataset/large_file.mako", + truncated_data=fh.read(max_peek_size), + data=dataset, + ), + headers, + ) else: column_names = "null" if dataset.metadata.column_names: diff --git a/lib/galaxy/managers/hdas.py b/lib/galaxy/managers/hdas.py index 8eefb8434753..f69a20c6cc4c 100644 --- a/lib/galaxy/managers/hdas.py +++ b/lib/galaxy/managers/hdas.py @@ -311,7 +311,10 @@ def text_data(self, hda, preview=True): truncated = preview and os.stat(file_path).st_size > MAX_PEEK_SIZE with get_fileobj(file_path) as fh: - hda_data = fh.read(MAX_PEEK_SIZE) + try: + hda_data = fh.read(MAX_PEEK_SIZE) + except UnicodeDecodeError: + raise exceptions.RequestParameterInvalidException("Cannot generate text preview for dataset.") return truncated, hda_data # .... annotatable From 92bfaa630eeee04c8b368df812380f09bcfa455b Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 4 Jul 2024 11:27:42 +0200 Subject: [PATCH 4/4] Fix statsd client on urls that aren't ascii encodable Fixes https://sentry.galaxyproject.org/share/issue/e2c535d2d4904d438a0a1524c991bba4/: ``` UnicodeEncodeError: 'ascii' codec can't encode characters in position 14-16: ordinal not in range(128) File "galaxy/web/framework/middleware/error.py", line 167, in __call__ app_iter = self.application(environ, sr_checker) File "galaxy/web/framework/middleware/statsd.py", line 34, in __call__ self.galaxy_stasd_client.timing(page, dt) File "galaxy/web/statsd_client.py", line 35, in timing self.statsd_client.timing(infix + path, time) File "statsd/client/base.py", line 33, in timing self._send_stat(stat, '%0.6f|ms' % delta, rate) File "statsd/client/base.py", line 61, in _send_stat self._after(self._prepare(stat, value, rate)) File "statsd/client/base.py", line 76, in _after self._send(data) File "statsd/client/udp.py", line 42, in _send self._sock.sendto(data.encode('ascii'), self._addr) ``` --- lib/galaxy/web/framework/middleware/statsd.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/web/framework/middleware/statsd.py b/lib/galaxy/web/framework/middleware/statsd.py index 1b017f715651..d817569f3d8e 100644 --- a/lib/galaxy/web/framework/middleware/statsd.py +++ b/lib/galaxy/web/framework/middleware/statsd.py @@ -28,8 +28,13 @@ def __call__(self, environ, start_response): start_time = time.time() req = self.application(environ, start_response) dt = int((time.time() - start_time) * 1000) - page = environ.get("controller_action_key", None) or environ.get("PATH_INFO", "NOPATH").strip("/").replace( - "/", "." + page = ( + environ.get("controller_action_key", None) + or environ.get("PATH_INFO", "NOPATH") + .strip("/") + .replace("/", ".") + .encode("ascii", errors="replace") + .decode() ) self.galaxy_stasd_client.timing(page, dt) try: