From 8f23c62e1c45490bb562b13afd89da5d6c228ae6 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 18 Mar 2024 16:36:39 +0100 Subject: [PATCH 1/3] Also set extension and metadata on copies of job outputs when finishing job --- lib/galaxy/jobs/__init__.py | 11 ++++++++--- lib/galaxy/model/store/__init__.py | 18 +++++++++++++----- lib/galaxy/tools/__init__.py | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 86c43b45ed7c..b431838ceae2 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -67,6 +67,7 @@ Task, ) from galaxy.model.base import transaction +from galaxy.model.store import copy_dataset_instance_metadata_attributes from galaxy.model.store.discover import MaxDiscoveredFilesExceededError from galaxy.objectstore import ObjectStorePopulator from galaxy.structured_app import MinimalManagerApp @@ -1949,9 +1950,7 @@ def fail(message=job.info, exception=None): ] for dataset_assoc in output_dataset_associations: - if getattr(dataset_assoc.dataset, "discovered", False): - # skip outputs that have been discovered - continue + is_discovered_dataset = getattr(dataset_assoc.dataset, "discovered", False) context = self.get_dataset_finish_context(job_context, dataset_assoc) # should this also be checking library associations? - can a library item be added from a history before the job has ended? - # lets not allow this to occur @@ -1960,6 +1959,12 @@ def fail(message=job.info, exception=None): dataset_assoc.dataset.dataset.history_associations + dataset_assoc.dataset.dataset.library_associations ): + if is_discovered_dataset: + if dataset is dataset_assoc.dataset: + continue + elif dataset.extension == dataset_assoc.dataset.extension or dataset.extension == "auto": + copy_dataset_instance_metadata_attributes(dataset_assoc.dataset, dataset) + continue output_name = dataset_assoc.name # Handles retry internally on error for instance... diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index bdd773946384..1582a19efb39 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -478,12 +478,11 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): if ( dataset_association is not dataset_instance and dataset_association.extension == dataset_instance.extension + or dataset_association.extension == "auto" ): - dataset_association.metadata = dataset_instance.metadata - dataset_association.blurb = dataset_instance.blurb - dataset_association.peek = dataset_instance.peek - dataset_association.info = dataset_instance.info - dataset_association.tool_version = dataset_instance.tool_version + copy_dataset_instance_metadata_attributes( + source=dataset_instance, target=dataset_association + ) if job: dataset_instance.dataset.job_id = job.id @@ -3085,3 +3084,12 @@ def payload_to_source_uri(payload) -> str: dump(store_dict, f) source_uri = f"file://{import_json}" return source_uri + + +def copy_dataset_instance_metadata_attributes(source: model.DatasetInstance, target: model.DatasetInstance) -> None: + target.metadata = source.metadata + target.blurb = source.blurb + target.peek = source.peek + target.info = source.info + target.tool_version = source.tool_version + target.extension = source.extension diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index f1640fd437d4..d50725c4a04d 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2875,7 +2875,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw break if copy_object is None: raise exceptions.MessageException("Failed to find dataset output.") - out_data[key].copy_from(copy_object) + out_data[key].copy_from(copy_object, include_metadata=True) def parse_environment_variables(self, tool_source): """Setup environment variable for inputs file.""" From 6f08b9fc9c93f7ab8c3c826bfc17bedb7da5569e Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 19 Mar 2024 10:41:16 +0100 Subject: [PATCH 2/3] Add unit test for updating output hda copies --- test/unit/data/model/test_model_store.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 81c3d9f68b5f..5e0163ce12bb 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -827,6 +827,21 @@ def test_sessionless_import_edit_datasets(): assert d2 is not None +def test_import_job_with_output_copy(): + app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True}) + hda = h.active_datasets[-1] + # Simulate a copy being made of an output hda + copy = hda.copy(new_name="output copy") + # set extension to auto, should be changed to real extension when finalizing job + copy.extension = "auto" + app.add_and_commit(copy) + import_model_store = store.get_import_model_store_for_directory( + temp_directory, import_options=store.ImportOptions(allow_dataset_object_edit=True, allow_edit=True), app=app + ) + import_model_store.perform_import() + assert copy.extension == "txt" + + def test_import_datasets_with_ids_fails_if_not_editing_models(): app, h, temp_directory, import_history = _setup_simple_export({"for_edit": True}) u = h.user From a689413a964289d9623c2612cdcaf66b4d5b149b Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 19 Mar 2024 11:29:45 +0100 Subject: [PATCH 3/3] Ensure metadata is copied for expression tool outputs --- lib/galaxy_test/api/test_workflows.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/galaxy_test/api/test_workflows.py b/lib/galaxy_test/api/test_workflows.py index 4919f6ca814e..8e346f4a59e6 100644 --- a/lib/galaxy_test/api/test_workflows.py +++ b/lib/galaxy_test/api/test_workflows.py @@ -2006,7 +2006,7 @@ def test_run_workflow_pick_value_bam_pja(self): # Makes sure that setting metadata on expression tool data outputs # doesn't break result evaluation. with self.dataset_populator.test_history() as history_id: - self._run_workflow( + summary = self._run_workflow( """class: GalaxyWorkflow inputs: some_file: @@ -2031,6 +2031,9 @@ def test_run_workflow_pick_value_bam_pja(self): - __index__: 0 value: __class__: RuntimeValue +outputs: + pick_out: + outputSource: pick_value/data_param """, test_data=""" some_file: @@ -2040,6 +2043,14 @@ def test_run_workflow_pick_value_bam_pja(self): """, history_id=history_id, ) + invocation_details = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True) + # Make sure metadata is actually available + pick_value_hda = invocation_details["outputs"]["pick_out"] + dataset_details = self.dataset_populator.get_history_dataset_details( + history_id=history_id, content_id=pick_value_hda["id"] + ) + assert dataset_details["metadata_reference_names"] + assert dataset_details["metadata_bam_index"] def test_run_workflow_simple_conditional_step(self): with self.dataset_populator.test_history() as history_id: