From 0467e96e94bfdbd67d23ba446829d32d44d1578f Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sun, 9 Jun 2024 08:50:19 +0200 Subject: [PATCH] Fix handling of collecting discovered but purged outputs --- lib/galaxy/job_execution/output_collect.py | 12 ++++++++++-- lib/galaxy/model/store/discover.py | 7 ++++++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/job_execution/output_collect.py b/lib/galaxy/job_execution/output_collect.py index aae1336f6484..8e37864767d0 100644 --- a/lib/galaxy/job_execution/output_collect.py +++ b/lib/galaxy/job_execution/output_collect.py @@ -521,7 +521,8 @@ def collect_primary_datasets(job_context: Union[JobContext, SessionlessJobContex outdata.designation = designation outdata.dataset.external_filename = None # resets filename_override # Move data from temp location to dataset location - job_context.object_store.update_from_file(outdata.dataset, file_name=filename, create=True) + if not outdata.dataset.purged: + job_context.object_store.update_from_file(outdata.dataset, file_name=filename, create=True) primary_output_assigned = True continue if name not in primary_datasets: @@ -554,6 +555,7 @@ def collect_primary_datasets(job_context: Union[JobContext, SessionlessJobContex dataset_attributes=new_primary_datasets_attributes, creating_job_id=job_context.get_job_id() if job_context else None, storage_callbacks=storage_callbacks, + purged=outdata.dataset.purged, ) # Associate new dataset with job job_context.add_output_dataset_association(f"__new_primary_file_{name}|{designation}__", primary_data) @@ -563,7 +565,13 @@ def collect_primary_datasets(job_context: Union[JobContext, SessionlessJobContex if primary_output_assigned: outdata.name = new_outdata_name outdata.init_meta() - outdata.set_meta() + if not outdata.dataset.purged: + try: + outdata.set_meta() + except Exception: + # We don't want to fail here on a single "bad" discovered dataset + log.debug("set meta failed for %s", outdata, exc_info=True) + outdata.state = HistoryDatasetAssociation.states.FAILED_METADATA outdata.set_peek() outdata.discovered = True sa_session = job_context.sa_session diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 2db3749feac9..b3a902ff8487 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -91,6 +91,7 @@ def create_dataset( creating_job_id=None, output_name=None, storage_callbacks=None, + purged=False, ): tag_list = tag_list or [] sources = sources or [] @@ -190,7 +191,11 @@ def create_dataset( if info is not None: primary_data.info = info - if filename: + + if purged: + primary_data.dataset.purged = True + primary_data.purged = True + if filename and not purged: if storage_callbacks is None: self.finalize_storage( primary_data=primary_data,