From 424695763b4e7a6d2c02810ea25528fc7ce09e20 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 24 Oct 2023 11:02:48 +0200 Subject: [PATCH 01/11] Provide error message instead of internal server error --- lib/galaxy/webapps/galaxy/controllers/history.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/galaxy/webapps/galaxy/controllers/history.py b/lib/galaxy/webapps/galaxy/controllers/history.py index 3f16bc0ecc94..e2246c28c158 100644 --- a/lib/galaxy/webapps/galaxy/controllers/history.py +++ b/lib/galaxy/webapps/galaxy/controllers/history.py @@ -488,6 +488,10 @@ def view(self, trans, id=None, show_deleted=False, show_hidden=False, use_panels history_is_current = history_to_view == trans.history else: history_to_view = trans.history + if not history_to_view: + raise exceptions.RequestParameterMissingException( + "No 'id' parameter provided for history, and user does not have a current history." + ) user_is_owner = True history_is_current = True From 81e9d52b4a85aff36c1a47b72e57025d2c4c8bae Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 24 Oct 2023 11:40:35 +0200 Subject: [PATCH 02/11] Fix unbound runner variable when there is an error in the job config Fixes ``` UnboundLocalError: local variable 'runner' referenced before assignment File "galaxy/jobs/handler.py", line 541, in __handle_waiting_jobs self.dispatcher.put(self.job_wrappers.pop(job.id)) File "galaxy/jobs/handler.py", line 1224, in put runner = self.get_job_runner(job_wrapper, get_task_runner=True) File "galaxy/jobs/handler.py", line 1221, in get_job_runner return runner ``` from https://sentry.galaxyproject.org/share/issue/a0a0049feb53487f9c84ede3784624b0/ which followed `(53095435) Invalid job runner: bridges`. --- lib/galaxy/jobs/handler.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py index e654423f2713..92beb0b4f1a0 100644 --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -1216,12 +1216,16 @@ def get_job_runner(self, job_wrapper, get_task_runner=False): except KeyError: log.error(f"({job_wrapper.job_id}) Invalid job runner: {runner_name}") job_wrapper.fail(DEFAULT_JOB_RUNNER_FAILURE_MESSAGE) + return None if get_task_runner and job_wrapper.can_split() and runner.runner_name != "PulsarJobRunner": return self.job_runners["tasks"] return runner def put(self, job_wrapper): runner = self.get_job_runner(job_wrapper, get_task_runner=True) + if runner is None: + # Something went wrong, we've already failed the job wrapper + return if isinstance(job_wrapper, TaskWrapper): # DBTODO Refactor log.debug(f"({job_wrapper.job_id}) Dispatching task {job_wrapper.task_id} to task runner") From 856930eef6d2d63bdd71ea99f3c1a9fa7b438e03 Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Thu, 22 Dec 2022 12:19:32 +0100 Subject: [PATCH 03/11] Fix missing grep input in sort1 tool --- tools/filters/sorter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/filters/sorter.py b/tools/filters/sorter.py index e9ccb53d9a01..50e81ddb419f 100644 --- a/tools/filters/sorter.py +++ b/tools/filters/sorter.py @@ -41,7 +41,8 @@ def main(): # grep comments grep_comments = ["grep", "^#"] - exit_code = subprocess.call(grep_comments, stdout=output_fh) + exit_code = subprocess.call(grep_comments, stdin=input_fh, stdout=output_fh) + input_fh.seek(0) if exit_code not in [0, 1]: stop_err("Searching for comment lines failed") From 285db62fd38a3d41bc9e0d243c0abfe4c2196612 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 25 Oct 2023 10:39:41 +0200 Subject: [PATCH 04/11] Improve invocation error reporting when step requires datasets in ok state --- lib/galaxy/exceptions/__init__.py | 10 ++++++++++ lib/galaxy/exceptions/error_codes.json | 5 +++++ lib/galaxy/tools/__init__.py | 11 ++++++++--- lib/galaxy/tools/execute.py | 20 ++++++++++++-------- lib/galaxy/workflow/modules.py | 10 ++++++++++ 5 files changed, 45 insertions(+), 11 deletions(-) diff --git a/lib/galaxy/exceptions/__init__.py b/lib/galaxy/exceptions/__init__.py index 2448fd817960..5806ed157c21 100644 --- a/lib/galaxy/exceptions/__init__.py +++ b/lib/galaxy/exceptions/__init__.py @@ -151,6 +151,16 @@ class ToolInputsNotReadyException(MessageException): error_code = error_codes_by_name["TOOL_INPUTS_NOT_READY"] +class ToolInputsNotOKException(MessageException): + def __init__(self, err_msg=None, type="info", *, src: str, id: int, **extra_error_info): + super().__init__(err_msg, type, **extra_error_info) + self.src = src + self.id = id + + status_code = 400 + error_code = error_codes_by_name["TOOL_INPUTS_NOT_OK"] + + class RealUserRequiredException(MessageException): status_code = 400 error_code = error_codes_by_name["REAL_USER_REQUIRED"] diff --git a/lib/galaxy/exceptions/error_codes.json b/lib/galaxy/exceptions/error_codes.json index 7d8f521b8218..99464306acb6 100644 --- a/lib/galaxy/exceptions/error_codes.json +++ b/lib/galaxy/exceptions/error_codes.json @@ -94,6 +94,11 @@ "code": 400016, "message": "Only real users can make this request." }, + { + "name": "TOOL_INPUTS_NOT_OK", + "code": 400017, + "message": "Tool inputs not in required OK state." + }, { "name": "USER_AUTHENTICATION_FAILED", "code": 401001, diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 5a92fd960e4b..62bf8b5e217b 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -34,7 +34,10 @@ exceptions, model, ) -from galaxy.exceptions import ToolInputsNotReadyException +from galaxy.exceptions import ( + ToolInputsNotOKException, + ToolInputsNotReadyException, +) from galaxy.job_execution import output_collect from galaxy.metadata import get_metadata_compute_strategy from galaxy.model.base import transaction @@ -3200,8 +3203,10 @@ def check_dataset_state(state): if self.require_dataset_ok: if state != model.Dataset.states.OK: - raise ValueError( - f"Tool requires inputs to be in valid state, but dataset {input_dataset} is in state '{input_dataset.state}'" + raise ToolInputsNotOKException( + f"Tool requires inputs to be in valid state, but dataset {input_dataset} is in state '{input_dataset.state}'", + src="hda", + id=input_dataset.id, ) for input_dataset in input_datasets.values(): diff --git a/lib/galaxy/tools/execute.py b/lib/galaxy/tools/execute.py index 412c36bcefdd..c935aff13ecc 100644 --- a/lib/galaxy/tools/execute.py +++ b/lib/galaxy/tools/execute.py @@ -19,6 +19,7 @@ from boltons.iterutils import remap from galaxy import model +from galaxy.exceptions import ToolInputsNotOKException from galaxy.model.base import transaction from galaxy.model.dataset_collections.matching import MatchingCollections from galaxy.model.dataset_collections.structure import ( @@ -143,14 +144,17 @@ def execute_single_job(execution_slice, completed_job, skip=False): if check_inputs_ready: for params in execution_tracker.param_combinations: # This will throw an exception if the tool is not ready. - check_inputs_ready( - tool, - trans, - params, - history, - execution_cache=execution_cache, - collection_info=collection_info, - ) + try: + check_inputs_ready( + tool, + trans, + params, + history, + execution_cache=execution_cache, + collection_info=collection_info, + ) + except ToolInputsNotOKException as e: + execution_tracker.record_error(e) execution_tracker.ensure_implicit_collections_populated(history, mapping_params.param_template) job_count = len(execution_tracker.param_combinations) diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py index 36bdd374b172..805a8ba3f2f6 100644 --- a/lib/galaxy/workflow/modules.py +++ b/lib/galaxy/workflow/modules.py @@ -2314,6 +2314,16 @@ def callback(input, prefixed_name, **kwargs): if execution_tracker.execution_errors: # TODO: formalize into InvocationFailure ? message = f"Failed to create {len(execution_tracker.execution_errors)} job(s) for workflow step {step.order_index + 1}: {str(execution_tracker.execution_errors[0])}" + for error in execution_tracker.execution_errors: + # try first to raise a structured invocation error message + if isinstance(error, exceptions.ToolInputsNotOKException) and error.src == "hda": + raise FailWorkflowEvaluation( + why=InvocationFailureDatasetFailed( + reason=FailureReason.dataset_failed, + hda_id=error.id, + workflow_step_id=step.id, + ) + ) raise exceptions.MessageException(message) return complete From c3513016029d5b622294199b2be3eb14d084335f Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 25 Oct 2023 11:53:29 +0200 Subject: [PATCH 05/11] Include failing step for unexpected failures, turn more exceptions into MessageExceptions --- .../InvocationMessage.vue | 15 ++++++++++--- .../invocationMessageModel.ts | 16 ++++++++++---- lib/galaxy/schema/invocation.py | 6 ++++- lib/galaxy/tools/__init__.py | 22 ++++++++++--------- lib/galaxy/workflow/run.py | 10 ++++++++- 5 files changed, 50 insertions(+), 19 deletions(-) diff --git a/client/src/components/WorkflowInvocationState/InvocationMessage.vue b/client/src/components/WorkflowInvocationState/InvocationMessage.vue index d31a4189b0bd..840ad7ed44f3 100644 --- a/client/src/components/WorkflowInvocationState/InvocationMessage.vue +++ b/client/src/components/WorkflowInvocationState/InvocationMessage.vue @@ -62,7 +62,12 @@ const workflow = computed(() => { }); const workflowStep = computed(() => { - if ("workflow_step_id" in props.invocationMessage && workflow.value) { + if ( + "workflow_step_id" in props.invocationMessage && + props.invocationMessage.workflow_step_id !== undefined && + props.invocationMessage.workflow_step_id !== null && + workflow.value + ) { return workflow.value.steps[props.invocationMessage.workflow_step_id]; } return undefined; @@ -146,10 +151,14 @@ const infoString = computed(() => { invocationMessage.workflow_step_id + 1 } is a conditional step and the result of the when expression is not a boolean type.`; } else if (reason === "unexpected_failure") { + let atStep = ""; + if (invocationMessage.workflow_step_id !== null && invocationMessage.workflow_step_id !== undefined) { + atStep = ` at step ${invocationMessage.workflow_step_id + 1}`; + } if (invocationMessage.details) { - return `${failFragment} an unexpected failure occurred: '${invocationMessage.details}'`; + return `${failFragment} an unexpected failure occurred${atStep}: '${invocationMessage.details}'`; } - return `${failFragment} an unexpected failure occurred.`; + return `${failFragment} an unexpected failure occurred${atStep}.`; } else if (reason === "workflow_output_not_found") { return `Defined workflow output '${invocationMessage.output_name}' was not found in step ${ invocationMessage.workflow_step_id + 1 diff --git a/client/src/components/WorkflowInvocationState/invocationMessageModel.ts b/client/src/components/WorkflowInvocationState/invocationMessageModel.ts index 7c86d43ad17c..29ab6a989847 100644 --- a/client/src/components/WorkflowInvocationState/invocationMessageModel.ts +++ b/client/src/components/WorkflowInvocationState/invocationMessageModel.ts @@ -77,7 +77,7 @@ export interface GenericInvocationFailureCollectionFailedEncodedDatabaseIdField /** * HistoryDatasetCollectionAssociation ID that relates to failure. */ - hdca_id?: string; + hdca_id: string; /** * Workflow step id of step that caused failure. */ @@ -92,7 +92,7 @@ export interface GenericInvocationFailureCollectionFailedInt { /** * HistoryDatasetCollectionAssociation ID that relates to failure. */ - hdca_id?: number; + hdca_id: number; /** * Workflow step id of step that caused failure. */ @@ -159,7 +159,7 @@ export interface GenericInvocationFailureJobFailedEncodedDatabaseIdField { /** * Job ID that relates to failure. */ - job_id?: string; + job_id: string; /** * Workflow step id of step that caused failure. */ @@ -174,7 +174,7 @@ export interface GenericInvocationFailureJobFailedInt { /** * Job ID that relates to failure. */ - job_id?: number; + job_id: number; /** * Workflow step id of step that caused failure. */ @@ -232,6 +232,10 @@ export interface GenericInvocationUnexpectedFailureEncodedDatabaseIdField { * May contains details to help troubleshoot this problem. */ details?: string; + /** + * Workflow step id of step that failed. + */ + workflow_step_id?: number; } export interface GenericInvocationUnexpectedFailureInt { reason: "unexpected_failure"; @@ -239,4 +243,8 @@ export interface GenericInvocationUnexpectedFailureInt { * May contains details to help troubleshoot this problem. */ details?: string; + /** + * Workflow step id of step that failed. + */ + workflow_step_id?: number; } diff --git a/lib/galaxy/schema/invocation.py b/lib/galaxy/schema/invocation.py index 0d2617a25862..fad8f6deba8c 100644 --- a/lib/galaxy/schema/invocation.py +++ b/lib/galaxy/schema/invocation.py @@ -51,7 +51,10 @@ def get(self, key: Any, default: Any = None) -> Any: # Fetch the order_index when serializing for the API, # which makes much more sense when pointing to steps. if key == "workflow_step_id": - return self._obj.workflow_step.order_index + if self._obj.workflow_step: + return self._obj.workflow_step.order_index + else: + return default elif key == "dependent_workflow_step_id": if self._obj.dependent_workflow_step_id: return self._obj.dependent_workflow_step.order_index @@ -132,6 +135,7 @@ class GenericInvocationFailureWhenNotBoolean(InvocationFailureMessageBase[Databa class GenericInvocationUnexpectedFailure(InvocationMessageBase, Generic[DatabaseIdT]): reason: Literal[FailureReason.unexpected_failure] details: Optional[str] = Field(None, description="May contains details to help troubleshoot this problem.") + workflow_step_id: Optional[int] = Field(None, description="Workflow step id of step that failed.") class GenericInvocationWarning(InvocationMessageBase, Generic[DatabaseIdT]): diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 62bf8b5e217b..3d5838d96490 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2861,7 +2861,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw copy_object = input_dataset break if copy_object is None: - raise Exception("Failed to find dataset output.") + raise exceptions.MessageException("Failed to find dataset output.") out_data[key].copy_from(copy_object) def parse_environment_variables(self, tool_source): @@ -3335,7 +3335,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history elif how == "by_index": extracted_element = collection[int(incoming["which"]["index"])] else: - raise Exception("Invalid tool parameters.") + raise exceptions.MessageException("Invalid tool parameters.") extracted = extracted_element.element_object extracted_o = extracted.copy( copy_tags=extracted.tags, new_name=extracted_element.element_identifier, flush=False @@ -3375,7 +3375,9 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history if element_identifier not in identifiers_map: identifiers_map[element_identifier] = [] elif dupl_actions == "fail": - raise Exception(f"Duplicate collection element identifiers found for [{element_identifier}]") + raise exceptions.MessageException( + f"Duplicate collection element identifiers found for [{element_identifier}]" + ) identifiers_map[element_identifier].append(input_num) for copy, input_list in enumerate(input_lists): @@ -3567,12 +3569,12 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history except KeyError: hdca_history_name = f"{hdca.hid}: {hdca.name}" message = f"List of element identifiers does not match element identifiers in collection '{hdca_history_name}'" - raise Exception(message) + raise exceptions.MessageException(message) else: message = f"Number of lines must match number of list elements ({len(elements)}), but file has {data_lines} lines" - raise Exception(message) + raise exceptions.MessageException(message) else: - raise Exception(f"Unknown sort_type '{sorttype}'") + raise exceptions.MessageException(f"Unknown sort_type '{sorttype}'") if presort_elements is not None: sorted_elements = [x[1] for x in sorted(presort_elements, key=lambda x: x[0])] @@ -3604,7 +3606,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history def add_copied_value_to_new_elements(new_label, dce_object): new_label = new_label.strip() if new_label in new_elements: - raise Exception( + raise exceptions.MessageException( f"New identifier [{new_label}] appears twice in resulting collection, these values must be unique." ) if getattr(dce_object, "history_content_type", None) == "dataset": @@ -3617,7 +3619,7 @@ def add_copied_value_to_new_elements(new_label, dce_object): with open(new_labels_path) as fh: new_labels = fh.readlines(1024 * 1000000) if strict and len(hdca.collection.elements) != len(new_labels): - raise Exception("Relabel mapping file contains incorrect number of identifiers") + raise exceptions.MessageException("Relabel mapping file contains incorrect number of identifiers") if how_type == "tabular": # We have a tabular file, where the first column is an existing element identifier, # and the second column is the new element identifier. @@ -3629,7 +3631,7 @@ def add_copied_value_to_new_elements(new_label, dce_object): default = None if strict else element_identifier new_label = new_labels_dict.get(element_identifier, default) if not new_label: - raise Exception(f"Failed to find new label for identifier [{element_identifier}]") + raise exceptions.MessageException(f"Failed to find new label for identifier [{element_identifier}]") add_copied_value_to_new_elements(new_label, dce_object) else: # If new_labels_dataset_assoc is not a two-column tabular dataset we label with the current line of the dataset @@ -3638,7 +3640,7 @@ def add_copied_value_to_new_elements(new_label, dce_object): add_copied_value_to_new_elements(new_labels[i], dce_object) for key in new_elements.keys(): if not re.match(r"^[\w\- \.,]+$", key): - raise Exception(f"Invalid new collection identifier [{key}]") + raise exceptions.MessageException(f"Invalid new collection identifier [{key}]") self._add_datasets_to_history(history, new_elements.values()) output_collections.create_collection( next(iter(self.outputs.values())), "output", elements=new_elements, propagate_hda_tags=False diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py index 6ddc56427672..d69fe79b6aa2 100644 --- a/lib/galaxy/workflow/run.py +++ b/lib/galaxy/workflow/run.py @@ -240,12 +240,20 @@ def invoke(self) -> Dict[int, Any]: except modules.DelayedWorkflowEvaluation as de: step_delayed = delayed_steps = True self.progress.mark_step_outputs_delayed(step, why=de.why) - except Exception: + except Exception as e: log.exception( "Failed to schedule %s, problem occurred on %s.", self.workflow_invocation.workflow.log_str(), step.log_str(), ) + if isinstance(e, MessageException): + # This is the highest level at which we can inject the step id + # to provide some more context to the exception. + raise modules.FailWorkflowEvaluation( + why=InvocationUnexpectedFailure( + reason=FailureReason.unexpected_failure, details=str(e), workflow_step_id=step.id + ) + ) raise if not step_delayed: From ce38206990ee83bd8b9b7327cb1eb387c0441fb7 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 25 Oct 2023 13:26:31 +0200 Subject: [PATCH 06/11] Extend tests for new invocation failure messages --- lib/galaxy_test/api/test_workflows.py | 72 +++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 393546d95ec6..5b7487404532 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -4060,6 +4060,78 @@ def test_workflow_warning_workflow_output_not_found(self, history_id): assert "workflow_step_id" in message assert message["output_name"] == "does_not_exist" + @skip_without_tool("__APPLY_RULES__") + @skip_without_tool("job_properties") + def test_workflow_failed_input_not_ok(self, history_id): + summary = self._run_workflow( + """ +class: GalaxyWorkflow +steps: + job_props: + tool_id: job_properties + state: + thebool: true + failbool: true + apply: + tool_id: __APPLY_RULES__ + in: + input: job_props/list_output + state: + rules: + rules: + - type: add_column_metadata + value: identifier0 + mapping: + - type: list_identifiers + columns: [0] + """, + history_id=history_id, + assert_ok=False, + wait=True, + ) + invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) + assert invocation_details["state"] == "failed" + assert len(invocation_details["messages"]) == 1 + message = invocation_details["messages"][0] + assert message["reason"] == "dataset_failed" + assert message["workflow_step_id"] == 1 + + @skip_without_tool("__RELABEL_FROM_FILE__") + def test_workflow_failed_with_message_exception(self, history_id): + summary = self._run_workflow( + """ +class: GalaxyWorkflow +inputs: + input_collection: + collection_type: list + type: collection + relabel_file: + type: data +steps: + relabel: + tool_id: __RELABEL_FROM_FILE__ + in: + input: input_collection + how|labels: relabel_file +test_data: + input_collection: + collection_type: "list:list" + relabel_file: + value: 1.bed + type: File + """, + history_id=history_id, + assert_ok=False, + wait=True, + ) + invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) + assert invocation_details["state"] == "failed" + assert len(invocation_details["messages"]) == 1 + message = invocation_details["messages"][0] + assert message["reason"] == "unexpected_failure" + assert message["workflow_step_id"] == 2 + assert "Invalid new collection identifier" in message["details"] + @skip_without_tool("identifier_multiple") def test_invocation_map_over(self, history_id): summary = self._run_workflow( From d411861d31819f190bee4f9e2a2a01a9752bb792 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Wed, 25 Oct 2023 14:39:02 +0200 Subject: [PATCH 07/11] Revert always reset external filename 8ae0b9241ea5a4aa4298855605057340d90b9639 --- lib/galaxy/metadata/set_metadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/metadata/set_metadata.py b/lib/galaxy/metadata/set_metadata.py index b512f3f905b5..ce5dea696ae4 100644 --- a/lib/galaxy/metadata/set_metadata.py +++ b/lib/galaxy/metadata/set_metadata.py @@ -448,7 +448,7 @@ def set_meta(new_dataset_instance, file_dict): object_store_update_actions.append( partial(push_if_necessary, object_store, dataset, external_filename) ) - object_store_update_actions.append(partial(reset_external_filename, dataset)) + object_store_update_actions.append(partial(reset_external_filename, dataset)) object_store_update_actions.append(partial(dataset.set_total_size)) object_store_update_actions.append(partial(export_store.add_dataset, dataset)) if dataset_instance_id not in unnamed_id_to_path: From f620b11a94460bb6162acda2cacbd4231a69f254 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 25 Oct 2023 16:16:50 +0200 Subject: [PATCH 08/11] Test link data with extended metadata --- .../test_upload_configuration_options.py | 126 +++++++++++------- 1 file changed, 81 insertions(+), 45 deletions(-) diff --git a/test/integration/test_upload_configuration_options.py b/test/integration/test_upload_configuration_options.py index 72bd2356721f..7b910f499bb9 100644 --- a/test/integration/test_upload_configuration_options.py +++ b/test/integration/test_upload_configuration_options.py @@ -30,6 +30,7 @@ from requests import Response +from galaxy.tool_util.verify.test_data import TestDataResolver from galaxy.util.unittest import TestCase from galaxy_test.base.api_util import TEST_USER from galaxy_test.base.constants import ( @@ -75,17 +76,19 @@ def fetch_target( response = self.dataset_populator.fetch(payload, assert_ok=assert_ok, wait=wait) return response - def _write_file(self, dir_path: str, content: str, filename: str = "test") -> str: - """Helper for writing ftp/server dir files.""" - self._ensure_directory(dir_path) - path = os.path.join(dir_path, filename) - with open(path, "w") as f: - f.write(content) - return path - def _ensure_directory(self, path: str) -> None: - if not os.path.exists(path): - os.makedirs(path) +def _write_file(dir_path: str, content: str, filename: str = "test") -> str: + """Helper for writing ftp/server dir files.""" + _ensure_directory(dir_path) + path = os.path.join(dir_path, filename) + with open(path, "w") as f: + f.write(content) + return path + + +def _ensure_directory(path: str) -> None: + if not os.path.exists(path): + os.makedirs(path) class BaseUploadContentConfigurationTestCase(BaseUploadContentConfigurationInstance, TestCase): @@ -190,6 +193,42 @@ def test_disallowed_for_fetch_urls(self, history_id: str) -> None: assert os.path.exists(path) +def fetch_link_data_only( + library_populator: LibraryPopulator, dataset_populator: DatasetPopulator, test_data_resolver: TestDataResolver +): + history_id, library, destination = library_populator.setup_fetch_to_folder("fetch_and_link") + bed_test_data_path = test_data_resolver.get_filename("4.bed") + assert os.path.exists(bed_test_data_path) + items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed", "link_data_only": True}] + targets = [{"destination": destination, "items": items}] + payload = { + "history_id": history_id, # TODO: Shouldn't be needed :( + "targets": targets, + } + dataset_populator.fetch(payload) + dataset = library_populator.get_library_contents_with_path(library["id"], "/4.bed") + assert dataset["file_size"] == 61, dataset + assert dataset["file_name"] == bed_test_data_path, dataset + assert os.path.exists(bed_test_data_path) + + +def link_data_only(server_dir: str, library_populator: LibraryPopulator) -> None: + content = "hello world\n" + dir_path = os.path.join(server_dir, "lib1") + file_path = _write_file(dir_path, content) + library = library_populator.new_private_library("serverdirupload") + # upload $GALAXY_ROOT/test-data/library + payload, files = library_populator.create_dataset_request( + library, upload_option="upload_directory", server_dir="lib1", link_data=True + ) + response = library_populator.raw_library_contents_create(library["id"], payload, files=files) + assert response.status_code == 200, response.json() + dataset = response.json()[0] + ok_dataset = library_populator.wait_on_library_dataset(library["id"], dataset["id"]) + assert ok_dataset["file_size"] == 12, ok_dataset + assert ok_dataset["file_name"] == file_path, ok_dataset + + class TestAdminsCanPasteFilePaths(BaseUploadContentConfigurationTestCase): require_admin_user = True @@ -506,9 +545,9 @@ def test_ftp_uploads_not_purged(self, history_id: str) -> None: class TestAdvancedFtpUploadFetch(BaseFtpUploadConfigurationTestCase): def test_fetch_ftp_directory(self, history_id: str) -> None: dir_path = self._get_user_ftp_path() - self._write_file(os.path.join(dir_path, "subdir"), "content 1", filename="1") - self._write_file(os.path.join(dir_path, "subdir"), "content 22", filename="2") - self._write_file(os.path.join(dir_path, "subdir"), "content 333", filename="3") + _write_file(os.path.join(dir_path, "subdir"), "content 1", filename="1") + _write_file(os.path.join(dir_path, "subdir"), "content 22", filename="2") + _write_file(os.path.join(dir_path, "subdir"), "content 333", filename="3") target = { "destination": {"type": "hdca"}, "elements_from": "directory", @@ -525,9 +564,9 @@ def test_fetch_ftp_directory(self, history_id: str) -> None: def test_fetch_nested_elements_from(self, history_id: str) -> None: dir_path = self._get_user_ftp_path() - self._write_file(os.path.join(dir_path, "subdir1"), "content 1", filename="1") - self._write_file(os.path.join(dir_path, "subdir1"), "content 22", filename="2") - self._write_file(os.path.join(dir_path, "subdir2"), "content 333", filename="3") + _write_file(os.path.join(dir_path, "subdir1"), "content 1", filename="1") + _write_file(os.path.join(dir_path, "subdir1"), "content 22", filename="2") + _write_file(os.path.join(dir_path, "subdir2"), "content 333", filename="3") elements = [ { "name": "subdirel1", @@ -667,7 +706,7 @@ def _copy_to_user_ftp_file(self, test_data_path: str) -> None: shutil.copyfile(input_path, os.path.join(target_dir, test_data_path)) def _write_user_ftp_file(self, path: str, content: str) -> str: - return self._write_file(os.path.join(self.ftp_dir(), TEST_USER), content, filename=path) + return _write_file(os.path.join(self.ftp_dir(), TEST_USER), content, filename=path) class TestServerDirectoryOffByDefault(BaseUploadContentConfigurationTestCase): @@ -721,20 +760,7 @@ def test_valid_server_dir_uploads_okay(self) -> None: assert library_dataset["file_size"] == 12, library_dataset def test_link_data_only(self) -> None: - content = "hello world\n" - dir_path = os.path.join(self.server_dir(), "lib1") - file_path = self._write_file(dir_path, content) - library = self.library_populator.new_private_library("serverdirupload") - # upload $GALAXY_ROOT/test-data/library - payload, files = self.library_populator.create_dataset_request( - library, upload_option="upload_directory", server_dir="lib1", link_data=True - ) - response = self.library_populator.raw_library_contents_create(library["id"], payload, files=files) - assert response.status_code == 200, response.json() - dataset = response.json()[0] - ok_dataset = self.library_populator.wait_on_library_dataset(library["id"], dataset["id"]) - assert ok_dataset["file_size"] == 12, ok_dataset - assert ok_dataset["file_name"] == file_path, ok_dataset + link_data_only(self.server_dir(), self.library_populator) @classmethod def server_dir(cls) -> str: @@ -813,20 +839,7 @@ def test_fetch_path_to_folder(self) -> None: assert os.path.exists(bed_test_data_path) def test_fetch_link_data_only(self) -> None: - history_id, library, destination = self.library_populator.setup_fetch_to_folder("fetch_and_link") - bed_test_data_path = self.test_data_resolver.get_filename("4.bed") - assert os.path.exists(bed_test_data_path) - items = [{"src": "path", "path": bed_test_data_path, "info": "my cool bed", "link_data_only": True}] - targets = [{"destination": destination, "items": items}] - payload = { - "history_id": history_id, # TODO: Shouldn't be needed :( - "targets": targets, - } - self.dataset_populator.fetch(payload) - dataset = self.library_populator.get_library_contents_with_path(library["id"], "/4.bed") - assert dataset["file_size"] == 61, dataset - assert dataset["file_name"] == bed_test_data_path, dataset - assert os.path.exists(bed_test_data_path) + fetch_link_data_only(self.library_populator, self.dataset_populator, self.test_data_resolver) def test_fetch_recursive_archive(self) -> None: history_id, library, destination = self.library_populator.setup_fetch_to_folder("recursive_archive") @@ -948,3 +961,26 @@ def test_tar_to_directory(self, history_id: str) -> None: history_id=history_id, ) self.dataset_populator.wait_for_job(response["jobs"][0]["id"]) + + +class TestLinkDataUploadExtendedMetadata(BaseUploadContentConfigurationTestCase): + require_admin_user = True + + @classmethod + def handle_galaxy_config_kwds(cls, config) -> None: + super().handle_galaxy_config_kwds(config) + config["allow_path_paste"] = True + config["metadata_strategy"] = "extended" + server_dir = cls.server_dir() + os.makedirs(server_dir) + config["library_import_dir"] = server_dir + + @classmethod + def server_dir(cls) -> str: + return cls.temp_config_dir("server") + + def test_fetch_link_data_only(self) -> None: + fetch_link_data_only(self.library_populator, self.dataset_populator, self.test_data_resolver) + + def test_link_data_only(self) -> None: + link_data_only(self.server_dir(), self.library_populator) From ca5132d7bf5165877dfbed873a89bbe7b1a5f5c5 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 26 Oct 2023 15:43:18 +0200 Subject: [PATCH 09/11] Skip change_datatype things if we're not actually changing the extension Side-steps a problem with FileParameter in the most efficient way possible. This likely became a problem for one of Wolfgang's workflow afer we dropped some earlier unnecessary flushes. --- lib/galaxy/datatypes/registry.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py index 8c14bb64d8d5..ce5f9ad890e5 100644 --- a/lib/galaxy/datatypes/registry.py +++ b/lib/galaxy/datatypes/registry.py @@ -590,13 +590,14 @@ def get_datatype_by_extension(self, ext): return self.datatypes_by_extension.get(ext, None) def change_datatype(self, data, ext): - data.extension = ext - # call init_meta and copy metadata from itself. The datatype - # being converted *to* will handle any metadata copying and - # initialization. - if data.has_data(): - data.set_size() - data.init_meta(copy_from=data) + if data.extension != ext: + data.extension = ext + # call init_meta and copy metadata from itself. The datatype + # being converted *to* will handle any metadata copying and + # initialization. + if data.has_data(): + data.set_size() + data.init_meta(copy_from=data) return data def load_datatype_converters(self, toolbox, use_cached=False): From d7beda0f1acf2fc923f4389fad3cc9fb049b7afe Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 26 Oct 2023 16:44:48 +0200 Subject: [PATCH 10/11] Commit if we've got outstanding MetadataFile instances --- lib/galaxy/model/metadata.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/metadata.py b/lib/galaxy/model/metadata.py index ed757314edca..a0b53a1527ae 100644 --- a/lib/galaxy/model/metadata.py +++ b/lib/galaxy/model/metadata.py @@ -604,7 +604,16 @@ def wrap(self, value, session): if isinstance(value, int): return session.query(galaxy.model.MetadataFile).get(value) else: - return session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one() + wrapped_value = session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none() + if wrapped_value: + return wrapped_value + else: + # If we've simultaneously copied the dataset and we've changed the datatype on the + # copy we may not have committed the MetadataFile yet, so we need to commit the session. + # TODO: It would be great if we can avoid the commit in the future. + with transaction(session): + session.commit() + return session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none() def make_copy(self, value, target_context: MetadataCollection, source_context): session = target_context._object_session(target_context.parent) From d92bbb144ffcda7e17368cf43dd25c8a9a3a7dd6 Mon Sep 17 00:00:00 2001 From: Martin Cech Date: Thu, 26 Oct 2023 20:36:48 +0200 Subject: [PATCH 11/11] unify statement format Co-authored-by: John Davis --- lib/galaxy/model/metadata.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/model/metadata.py b/lib/galaxy/model/metadata.py index c2302e4a23e8..bcc68f429ac4 100644 --- a/lib/galaxy/model/metadata.py +++ b/lib/galaxy/model/metadata.py @@ -605,7 +605,9 @@ def wrap(self, value, session): if isinstance(value, int): return session.get(galaxy.model.MetadataFile, value) else: - wrapped_value = session.query(galaxy.model.MetadataFile).filter_by(uuid=value).one_or_none() + wrapped_value = session.execute( + select(galaxy.model.MetadataFile).filter_by(uuid=value) + ).scalar_one_or_none() if wrapped_value: return wrapped_value else: