From 6d0212518ba94eaa29f4f5bcef43b34bed07daf4 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 14 Feb 2024 19:09:16 +0100 Subject: [PATCH 1/2] Set metadata states on dataset association, not dataset --- lib/galaxy/model/store/__init__.py | 2 +- lib/galaxy/model/store/discover.py | 2 +- lib/galaxy/webapps/galaxy/services/history_contents.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 4ea835bbd2b8..f6c57d9cfcaa 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -708,7 +708,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): dataset_instance.datatype.set_meta(dataset_instance) except Exception: log.debug(f"Metadata setting failed on {dataset_instance}", exc_info=True) - dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA + dataset_instance._state = dataset_instance.dataset.states.FAILED_METADATA if model_class == "HistoryDatasetAssociation": if not isinstance(dataset_instance, model.HistoryDatasetAssociation): diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index c08579cc3725..96415f696224 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -260,7 +260,7 @@ def set_datasets_metadata(datasets, datasets_attributes=None): primary_data.set_meta() except Exception: if primary_data.state == galaxy.model.HistoryDatasetAssociation.states.OK: - primary_data.state = galaxy.model.HistoryDatasetAssociation.states.FAILED_METADATA + primary_data._state = galaxy.model.HistoryDatasetAssociation.states.FAILED_METADATA log.exception("Exception occured while setting metdata") try: diff --git a/lib/galaxy/webapps/galaxy/services/history_contents.py b/lib/galaxy/webapps/galaxy/services/history_contents.py index 641020483d94..c12c3f7d9ea0 100644 --- a/lib/galaxy/webapps/galaxy/services/history_contents.py +++ b/lib/galaxy/webapps/galaxy/services/history_contents.py @@ -1477,7 +1477,7 @@ def _change_item_datatype( self.hda_manager.ensure_can_change_datatype(item) self.hda_manager.ensure_can_set_metadata(item) is_deferred = item.has_deferred_data - item.dataset.state = item.dataset.states.SETTING_METADATA + item._state = item.dataset.states.SETTING_METADATA if is_deferred: if params.datatype == "auto": # if `auto` just keep the original guessed datatype item.update() # TODO: remove this `update` when we can properly track the operation results to notify the history From 513ca7494de939f600ebcb3596b58906ea9a59c6 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 15 Feb 2024 11:01:20 +0100 Subject: [PATCH 2/2] Drop overly complicated state setter logic I doubt anyone instinctively know when to use `._state`, `.state`, `.raw_set_dataset_state` or `.set_dataset_state` and I think that was part of what led to this bug. --- lib/galaxy/celery/tasks.py | 5 ++- lib/galaxy/jobs/__init__.py | 4 +- lib/galaxy/jobs/handler.py | 4 +- lib/galaxy/model/__init__.py | 41 +++++++++---------- lib/galaxy/model/store/__init__.py | 7 ++-- lib/galaxy/model/store/discover.py | 6 +-- lib/galaxy/tools/__init__.py | 4 +- lib/galaxy/tools/actions/metadata.py | 2 +- .../galaxy/services/history_contents.py | 4 +- test/unit/app/tools/test_column_parameters.py | 2 +- .../app/tools/test_parameter_validation.py | 10 ++--- 11 files changed, 44 insertions(+), 45 deletions(-) diff --git a/lib/galaxy/celery/tasks.py b/lib/galaxy/celery/tasks.py index b94cc8f832b4..b29b67981ba2 100644 --- a/lib/galaxy/celery/tasks.py +++ b/lib/galaxy/celery/tasks.py @@ -197,10 +197,11 @@ def set_metadata( hda_manager.overwrite_metadata(dataset_instance) dataset_instance.datatype.set_meta(dataset_instance) dataset_instance.set_peek() - dataset_instance.dataset.state = dataset_instance.dataset.states.OK + # Reset SETTING_METADATA state so the dataset instance getter picks the dataset state + dataset_instance.set_metadata_succces_state() except Exception as e: log.info(f"Setting metadata failed on {model_class} {dataset_instance.id}: {str(e)}") - dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA + dataset_instance.state = dataset_instance.states.FAILED_METADATA with transaction(sa_session): sa_session.commit() diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index 3329c98253c5..cdac9e74e4cf 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -1769,7 +1769,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r ) if not metadata_set_successfully: if self.tool.tool_type == "expression": - dataset._state = model.Dataset.states.OK + dataset.set_metadata_succces_state() elif retry_internally: # If Galaxy was expected to sniff type and didn't - do so. if dataset.ext == "_sniff_": @@ -1781,7 +1781,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r # call datatype.set_meta directly for the initial set_meta call during dataset creation dataset.datatype.set_meta(dataset, overwrite=False) else: - dataset._state = model.Dataset.states.FAILED_METADATA + dataset.state = model.HistoryDatasetAssociation.states.FAILED_METADATA else: self.external_output_metadata.load_metadata( dataset, diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py index 0b4819f684f6..9205e77acef5 100644 --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -620,7 +620,9 @@ def __filter_jobs_with_invalid_input_states(self, jobs): ) ), input_association.deleted == true(), - input_association._state == input_association.states.FAILED_METADATA, + input_association._state.in_( + (input_association.states.FAILED_METADATA, input_association.states.SETTING_METADATA) + ), ) ) .all() diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index c73b564cbf5b..959da9f8e5b2 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -4310,7 +4310,7 @@ class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable): """A base class for all 'dataset instances', HDAs, LDAs, etc""" states = Dataset.states - _state: str + _state: Optional[str] conversion_messages = Dataset.conversion_messages permitted_actions = Dataset.permitted_actions purged: bool @@ -4394,32 +4394,29 @@ def ext(self): @property def has_deferred_data(self): - return self.get_dataset_state() == Dataset.states.DEFERRED + return self.dataset.state == Dataset.states.DEFERRED - def get_dataset_state(self): - # self._state is currently only used when setting metadata externally - # leave setting the state as-is, we'll currently handle this specially in the external metadata code + @property + def state(self): + # self._state holds state that should only affect this particular dataset association, not the dataset state itself if self._state: return self._state return self.dataset.state - def raw_set_dataset_state(self, state): - if state != self.dataset.state: - self.dataset.state = state - return True - else: - return False - - def set_dataset_state(self, state): - if self.raw_set_dataset_state(state): - sa_session = object_session(self) - if sa_session: - object_session(self).add(self.dataset) - session = object_session(self) - with transaction(session): - session.commit() # flush here, because hda.flush() won't flush the Dataset object - - state = property(get_dataset_state, set_dataset_state) + @state.setter + def state(self, state: Optional[DatasetState]): + if state != self.state: + if state in (DatasetState.FAILED_METADATA, DatasetState.SETTING_METADATA): + self._state = state + else: + self.set_metadata_succces_state() + sa_session = object_session(self) + if sa_session: + sa_session.add(self.dataset) + self.dataset.state = state + + def set_metadata_succces_state(self): + self._state = None def get_file_name(self, sync_cache=True) -> str: if self.dataset.purged: diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index f6c57d9cfcaa..1a38ebfd4ae4 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -620,10 +620,9 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): discarded_data = self.import_options.discarded_data dataset_state = dataset_attrs.get("state", dataset_instance.states.OK) if dataset_state == dataset_instance.states.DEFERRED: - dataset_instance._state = dataset_instance.states.DEFERRED + dataset_instance.state = dataset_instance.states.DEFERRED dataset_instance.deleted = False dataset_instance.purged = False - dataset_instance.dataset.state = dataset_instance.states.DEFERRED dataset_instance.dataset.deleted = False dataset_instance.dataset.purged = False elif ( @@ -635,7 +634,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): target_state = ( dataset_instance.states.DISCARDED if is_discarded else dataset_instance.states.DEFERRED ) - dataset_instance._state = target_state + dataset_instance.state = target_state deleted = is_discarded and (discarded_data == ImportDiscardedDataType.FORBID) dataset_instance.deleted = deleted dataset_instance.purged = deleted @@ -708,7 +707,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): dataset_instance.datatype.set_meta(dataset_instance) except Exception: log.debug(f"Metadata setting failed on {dataset_instance}", exc_info=True) - dataset_instance._state = dataset_instance.dataset.states.FAILED_METADATA + dataset_instance.state = dataset_instance.dataset.states.FAILED_METADATA if model_class == "HistoryDatasetAssociation": if not isinstance(dataset_instance, model.HistoryDatasetAssociation): diff --git a/lib/galaxy/model/store/discover.py b/lib/galaxy/model/store/discover.py index 96415f696224..705165fe7242 100644 --- a/lib/galaxy/model/store/discover.py +++ b/lib/galaxy/model/store/discover.py @@ -128,7 +128,7 @@ def create_dataset( if init_from: self.permission_provider.copy_dataset_permissions(init_from, primary_data) - primary_data.raw_set_dataset_state(init_from.state) + primary_data.state = init_from.state else: self.permission_provider.set_default_hda_permissions(primary_data) else: @@ -147,7 +147,7 @@ def create_dataset( self.add_library_dataset_to_folder(library_folder, ld) primary_data = ldda - primary_data.raw_set_dataset_state(final_job_state) + primary_data.state = final_job_state if final_job_state == galaxy.model.Job.states.ERROR and not self.get_implicit_collection_jobs_association_id(): primary_data.visible = True @@ -260,7 +260,7 @@ def set_datasets_metadata(datasets, datasets_attributes=None): primary_data.set_meta() except Exception: if primary_data.state == galaxy.model.HistoryDatasetAssociation.states.OK: - primary_data._state = galaxy.model.HistoryDatasetAssociation.states.FAILED_METADATA + primary_data.state = galaxy.model.HistoryDatasetAssociation.states.FAILED_METADATA log.exception("Exception occured while setting metdata") try: diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 3168f6a52e13..00da6339aa2a 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3020,7 +3020,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw metadata_set_successfully = False log.exception("Exception occured while loading metadata results") if not metadata_set_successfully: - dataset._state = model.Dataset.states.FAILED_METADATA + dataset.state = model.DatasetInstance.states.FAILED_METADATA self.sa_session.add(dataset) with transaction(self.sa_session): self.sa_session.commit() @@ -3033,7 +3033,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw dataset.state = param_dict.get("__ORIGINAL_DATASET_STATE__") else: # Revert dataset.state to fall back to dataset.dataset.state - dataset._state = None + dataset.set_metadata_succces_state() # Need to reset the peek, which may rely on metadata # TODO: move this into metadata setting, setting the peek requires dataset access, # and large chunks of the dataset may be read here. diff --git a/lib/galaxy/tools/actions/metadata.py b/lib/galaxy/tools/actions/metadata.py index 95b5e502c70c..04db0ab30e17 100644 --- a/lib/galaxy/tools/actions/metadata.py +++ b/lib/galaxy/tools/actions/metadata.py @@ -145,7 +145,7 @@ def execute_via_app( job.add_input_library_dataset(dataset_name, dataset) # Need a special state here to show that metadata is being set and also allow the job to run # i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state - dataset._state = dataset.states.SETTING_METADATA + dataset.state = dataset.states.SETTING_METADATA job.state = start_job_state # job inputs have been configured, restore initial job state with transaction(sa_session): sa_session.commit() diff --git a/lib/galaxy/webapps/galaxy/services/history_contents.py b/lib/galaxy/webapps/galaxy/services/history_contents.py index c12c3f7d9ea0..110d3bddca2f 100644 --- a/lib/galaxy/webapps/galaxy/services/history_contents.py +++ b/lib/galaxy/webapps/galaxy/services/history_contents.py @@ -1477,13 +1477,13 @@ def _change_item_datatype( self.hda_manager.ensure_can_change_datatype(item) self.hda_manager.ensure_can_set_metadata(item) is_deferred = item.has_deferred_data - item._state = item.dataset.states.SETTING_METADATA + item.state = item.dataset.states.SETTING_METADATA if is_deferred: if params.datatype == "auto": # if `auto` just keep the original guessed datatype item.update() # TODO: remove this `update` when we can properly track the operation results to notify the history else: trans.app.datatypes_registry.change_datatype(item, params.datatype) - item.dataset.state = item.dataset.states.DEFERRED + item.state = item.dataset.states.DEFERRED else: return change_datatype.si( dataset_id=item.id, datatype=params.datatype, task_user_id=getattr(trans.user, "id", None) diff --git a/test/unit/app/tools/test_column_parameters.py b/test/unit/app/tools/test_column_parameters.py index 690292ed6d1d..f21ed83f8616 100644 --- a/test/unit/app/tools/test_column_parameters.py +++ b/test/unit/app/tools/test_column_parameters.py @@ -87,7 +87,7 @@ def build_ready_hda(self): extension="interval", create_dataset=True, sa_session=self.app.model.context ) ) - ready_hda.set_dataset_state("ok") + ready_hda.state = "ok" return ready_hda @property diff --git a/test/unit/app/tools/test_parameter_validation.py b/test/unit/app/tools/test_parameter_validation.py index 8c337600944e..2e0bf0ffe857 100644 --- a/test/unit/app/tools/test_parameter_validation.py +++ b/test/unit/app/tools/test_parameter_validation.py @@ -226,11 +226,11 @@ def test_DatasetOkValidator(self): ok_hda = hist.add_dataset( HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session) ) - ok_hda.set_dataset_state(Dataset.states.OK) + ok_hda.state = Dataset.states.OK notok_hda = hist.add_dataset( HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session) ) - notok_hda.set_dataset_state(Dataset.states.EMPTY) + notok_hda.state = Dataset.states.EMPTY p = self._parameter_for( xml=""" @@ -349,7 +349,7 @@ def test_MetadataValidator(self): dataset=Dataset(external_filename=get_test_fname("1.bed")), ) ) - hda.set_dataset_state(Dataset.states.OK) + hda.state = Dataset.states.OK hda.set_meta() hda.metadata.strandCol = hda.metadata.spec["strandCol"].no_value @@ -409,12 +409,12 @@ def test_UnspecifiedBuildValidator(self): has_dbkey_hda = hist.add_dataset( HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session) ) - has_dbkey_hda.set_dataset_state(Dataset.states.OK) + has_dbkey_hda.state = Dataset.states.OK has_dbkey_hda.metadata.dbkey = "hg19" has_no_dbkey_hda = hist.add_dataset( HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session) ) - has_no_dbkey_hda.set_dataset_state(Dataset.states.OK) + has_no_dbkey_hda.state = Dataset.states.OK p = self._parameter_for( xml="""