From ee56d70b243c1e6ccf788ad39305432e72cfb720 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 8 Jul 2024 19:00:53 +0200 Subject: [PATCH 1/3] Add test for submitting hdca with deleted elements --- lib/galaxy_test/api/test_jobs.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lib/galaxy_test/api/test_jobs.py b/lib/galaxy_test/api/test_jobs.py index 09193ea2065e..7c226de419b7 100644 --- a/lib/galaxy_test/api/test_jobs.py +++ b/lib/galaxy_test/api/test_jobs.py @@ -576,6 +576,23 @@ def paths_deleted(): if output_dataset_paths_exist: wait_on(paths_deleted, "path deletion") + def test_submission_on_collection_with_deleted_element(self, history_id): + hdca = self.dataset_collection_populator.create_list_of_list_in_history(history_id=history_id, wait=True).json() + hda_id = hdca["elements"][0]["object"]["elements"][0]["object"]["id"] + self.dataset_populator.delete_dataset(history_id=history_id, content_id=hda_id) + response = self.dataset_populator.run_tool_raw( + "is_of_type", + inputs={ + "collection": {"batch": True, "values": [{"src": "hdca", "id": hdca["id"], "map_over_type": "list"}]}, + }, + history_id=history_id, + ) + assert response.status_code == 400 + assert ( + response.json()["err_msg"] + == "parameter 'collection': the previously selected dataset collection has elements that are deleted." + ) + @pytest.mark.require_new_history @skip_without_tool("create_2") def test_purging_output_cleaned_after_ok_run(self, history_id): From 3b65665ba635c1ab71e96a88763bb03079b56c9d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Sat, 25 May 2024 15:56:19 +0200 Subject: [PATCH 2/3] Raise ``ParameterValueError`` if input collection elements deleted This is a prerequisite to fixing https://github.com/galaxyproject/galaxy/issues/18006 --- lib/galaxy/model/__init__.py | 8 +++++ lib/galaxy/tools/parameters/basic.py | 53 +++++++++++++++++++++------- 2 files changed, 48 insertions(+), 13 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index e4bd56ace205..cbf60fcc939f 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6499,6 +6499,14 @@ def attribute_columns(column_collection, attributes, nesting_level=None): q = q.order_by(*order_by_columns) return q + @property + def elements_deleted(self): + stmt = self._build_nested_collection_attributes_stmt( + hda_attributes=("deleted",), dataset_attributes=("deleted",) + ) + stmt = exists(stmt).where(or_(HistoryDatasetAssociation.deleted == true(), Dataset.deleted == true())) + return object_session(self).execute(select(stmt)).scalar() + @property def dataset_states_and_extensions_summary(self): if not hasattr(self, "_dataset_states_and_extensions_summary"): diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index ebdad3e37aed..9a9cd334ebba 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -2188,19 +2188,37 @@ def from_json(self, value, trans, other_values=None): dataset_matcher_factory = get_dataset_matcher_factory(trans) dataset_matcher = dataset_matcher_factory.dataset_matcher(self, other_values) for v in rval: - if v: - if hasattr(v, "deleted") and v.deleted: + if isinstance(v, DatasetCollectionElement): + if hda := v.hda: + v = hda + elif ldda := v.ldda: + v = ldda + elif collection := v.child_collection: + v = collection + elif not v.collection and v.collection.populated_optimized: + raise ParameterValueError("the selected collection has not been populated.", self.name) + else: + raise ParameterValueError("Collection element in unexpected state", self.name) + if isinstance(v, DatasetInstance): + if v.deleted: raise ParameterValueError("the previously selected dataset has been deleted.", self.name) - elif hasattr(v, "dataset") and v.dataset.state in [Dataset.states.ERROR, Dataset.states.DISCARDED]: + elif v.dataset and v.dataset.state in [Dataset.states.ERROR, Dataset.states.DISCARDED]: raise ParameterValueError( "the previously selected dataset has entered an unusable state", self.name ) - elif hasattr(v, "dataset"): - if isinstance(v, DatasetCollectionElement): - v = v.hda - match = dataset_matcher.hda_match(v) - if match and match.implicit_conversion: - v.implicit_conversion = True # type:ignore[union-attr] + match = dataset_matcher.hda_match(v) + if match and match.implicit_conversion: + v.implicit_conversion = True # type:ignore[union-attr] + elif isinstance(v, HistoryDatasetCollectionAssociation): + if v.deleted: + raise ParameterValueError("the previously selected dataset collection has been deleted.", self.name) + v = v.collection + if isinstance(v, DatasetCollection): + if v.elements_deleted: + raise ParameterValueError( + "the previously selected dataset collection has elements that are deleted.", self.name + ) + if not self.multiple: if len(rval) > 1: raise ParameterValueError("more than one dataset supplied to single input dataset parameter", self.name) @@ -2497,10 +2515,19 @@ def from_json(self, value, trans, other_values=None): rval = session.get(HistoryDatasetCollectionAssociation, int(value[len("hdca:") :])) else: rval = session.get(HistoryDatasetCollectionAssociation, int(value)) - if rval and isinstance(rval, HistoryDatasetCollectionAssociation): - if rval.deleted: - raise ParameterValueError("the previously selected dataset collection has been deleted", self.name) - # TODO: Handle error states, implement error states ... + if rval: + if isinstance(rval, HistoryDatasetCollectionAssociation): + if rval.deleted: + raise ParameterValueError("the previously selected dataset collection has been deleted", self.name) + if rval.collection.elements_deleted: + raise ParameterValueError( + "the previously selected dataset collection has elements that are deleted.", self.name + ) + if isinstance(rval, DatasetCollectionElement): + if (child_collection := rval.child_collection) and child_collection.elements_deleted: + raise ParameterValueError( + "the previously selected dataset collection has elements that are deleted.", self.name + ) return rval def to_text(self, value): From 0be837edc27482b3d210d2a5ba4494cc6de09d4a Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 9 Jul 2024 18:49:38 +0200 Subject: [PATCH 3/3] Cache and provide in-memory fallback --- lib/galaxy/model/__init__.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index cbf60fcc939f..aaedf98f4c66 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -6501,11 +6501,20 @@ def attribute_columns(column_collection, attributes, nesting_level=None): @property def elements_deleted(self): - stmt = self._build_nested_collection_attributes_stmt( - hda_attributes=("deleted",), dataset_attributes=("deleted",) - ) - stmt = exists(stmt).where(or_(HistoryDatasetAssociation.deleted == true(), Dataset.deleted == true())) - return object_session(self).execute(select(stmt)).scalar() + if not hasattr(self, "_elements_deleted"): + if session := object_session(self): + stmt = self._build_nested_collection_attributes_stmt( + hda_attributes=("deleted",), dataset_attributes=("deleted",) + ) + stmt = stmt.exists().where(or_(HistoryDatasetAssociation.deleted == true(), Dataset.deleted == true())) + self._elements_deleted = session.execute(select(stmt)).scalar() + else: + self._elements_deleted = False + for dataset_instance in self.dataset_instances: + if dataset_instance.deleted or dataset_instance.dataset.deleted: + self._elements_deleted = True + break + return self._elements_deleted @property def dataset_states_and_extensions_summary(self):