Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.1] Increase API robustness to invalid requests, improve compressed data serving #18494

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions lib/galaxy/datatypes/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,16 +484,17 @@ def _serve_file_download(self, headers, data, trans, to_ext, file_size, **kwd):

def _serve_binary_file_contents_as_text(self, trans, data, headers, file_size, max_peek_size):
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/binary_file.mako",
data=data,
file_contents=open(data.get_file_name(), "rb").read(max_peek_size),
file_size=util.nice_size(file_size),
truncated=file_size > max_peek_size,
),
headers,
)
with open(data.get_file_name(), "rb") as fh:
return (
trans.fill_template_mako(
"/dataset/binary_file.mako",
data=data,
file_contents=fh.read(max_peek_size),
file_size=util.nice_size(file_size),
truncated=file_size > max_peek_size,
),
headers,
)

def _serve_file_contents(self, trans, data, headers, preview, file_size, max_peek_size):
from galaxy.datatypes import images
Expand All @@ -502,16 +503,17 @@ def _serve_file_contents(self, trans, data, headers, preview, file_size, max_pee
if not preview or isinstance(data.datatype, images.Image) or file_size < max_peek_size:
return self._yield_user_file_content(trans, data, data.get_file_name(), headers), headers

# preview large text file
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako",
truncated_data=open(data.get_file_name(), "rb").read(max_peek_size),
data=data,
),
headers,
)
with compression_utils.get_fileobj(data.get_file_name(), "rb") as fh:
# preview large text file
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako",
truncated_data=fh.read(max_peek_size),
data=data,
),
headers,
)

def display_data(
self,
Expand Down
17 changes: 9 additions & 8 deletions lib/galaxy/datatypes/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,14 +193,15 @@ def display_data(
return open(dataset.get_file_name(), mode="rb"), headers
else:
headers["content-type"] = "text/html"
return (
trans.fill_template_mako(
"/dataset/large_file.mako",
truncated_data=open(dataset.get_file_name()).read(max_peek_size),
data=dataset,
),
headers,
)
with compression_utils.get_fileobj(dataset.get_file_name(), "rb") as fh:
return (
trans.fill_template_mako(
"/dataset/large_file.mako",
truncated_data=fh.read(max_peek_size),
data=dataset,
),
headers,
)
else:
column_names = "null"
if dataset.metadata.column_names:
Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/managers/hdas.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,10 @@ def text_data(self, hda, preview=True):

truncated = preview and os.stat(file_path).st_size > MAX_PEEK_SIZE
with get_fileobj(file_path) as fh:
hda_data = fh.read(MAX_PEEK_SIZE)
try:
hda_data = fh.read(MAX_PEEK_SIZE)
except UnicodeDecodeError:
raise exceptions.RequestParameterInvalidException("Cannot generate text preview for dataset.")
return truncated, hda_data

# .... annotatable
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2053,7 +2053,8 @@ def src_id_to_item(
item = sa_session.get(src_to_class[value["src"]], decoded_id)
except KeyError:
raise ValueError(f"Unknown input source {value['src']} passed to job submission API.")
assert item
if not item:
raise ValueError("Invalid input id passed to job submission API.")
item.extra_params = {k: v for k, v in value.items() if k not in ("src", "id")}
return item

Expand Down
9 changes: 7 additions & 2 deletions lib/galaxy/web/framework/middleware/statsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@ def __call__(self, environ, start_response):
start_time = time.time()
req = self.application(environ, start_response)
dt = int((time.time() - start_time) * 1000)
page = environ.get("controller_action_key", None) or environ.get("PATH_INFO", "NOPATH").strip("/").replace(
"/", "."
page = (
environ.get("controller_action_key", None)
or environ.get("PATH_INFO", "NOPATH")
.strip("/")
.replace("/", ".")
.encode("ascii", errors="replace")
.decode()
)
self.galaxy_stasd_client.timing(page, dt)
try:
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/workflow/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,8 @@ def get_all_outputs(self, data_only=False):
# This can happen when importing workflows with missing tools.
# We can't raise an exception here, as that would prevent loading
# the workflow.
log.error(
# This is also listed when opening such a workflow in the workflow editor.
log.warning(
f"Workflow output '{workflow_output['output_name']}' defined, but not listed among data outputs"
)
continue
Expand Down
Loading