Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[23.2] Set metadata states on dataset association, not dataset #17474

Merged
merged 2 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions lib/galaxy/celery/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,10 +197,11 @@ def set_metadata(
hda_manager.overwrite_metadata(dataset_instance)
dataset_instance.datatype.set_meta(dataset_instance)
dataset_instance.set_peek()
dataset_instance.dataset.state = dataset_instance.dataset.states.OK
# Reset SETTING_METADATA state so the dataset instance getter picks the dataset state
dataset_instance.set_metadata_succces_state()
except Exception as e:
log.info(f"Setting metadata failed on {model_class} {dataset_instance.id}: {str(e)}")
dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA
dataset_instance.state = dataset_instance.states.FAILED_METADATA
with transaction(sa_session):
sa_session.commit()

Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
)
if not metadata_set_successfully:
if self.tool.tool_type == "expression":
dataset._state = model.Dataset.states.OK
dataset.set_metadata_succces_state()
elif retry_internally:
# If Galaxy was expected to sniff type and didn't - do so.
if dataset.ext == "_sniff_":
Expand All @@ -1781,7 +1781,7 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
# call datatype.set_meta directly for the initial set_meta call during dataset creation
dataset.datatype.set_meta(dataset, overwrite=False)
else:
dataset._state = model.Dataset.states.FAILED_METADATA
dataset.state = model.HistoryDatasetAssociation.states.FAILED_METADATA
else:
self.external_output_metadata.load_metadata(
dataset,
Expand Down
4 changes: 3 additions & 1 deletion lib/galaxy/jobs/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,9 @@ def __filter_jobs_with_invalid_input_states(self, jobs):
)
),
input_association.deleted == true(),
input_association._state == input_association.states.FAILED_METADATA,
input_association._state.in_(
(input_association.states.FAILED_METADATA, input_association.states.SETTING_METADATA)
),
)
)
.all()
Expand Down
41 changes: 19 additions & 22 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4310,7 +4310,7 @@ class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable):
"""A base class for all 'dataset instances', HDAs, LDAs, etc"""

states = Dataset.states
_state: str
_state: Optional[str]
conversion_messages = Dataset.conversion_messages
permitted_actions = Dataset.permitted_actions
purged: bool
Expand Down Expand Up @@ -4394,32 +4394,29 @@ def ext(self):

@property
def has_deferred_data(self):
return self.get_dataset_state() == Dataset.states.DEFERRED
return self.dataset.state == Dataset.states.DEFERRED

def get_dataset_state(self):
# self._state is currently only used when setting metadata externally
# leave setting the state as-is, we'll currently handle this specially in the external metadata code
@property
def state(self):
# self._state holds state that should only affect this particular dataset association, not the dataset state itself
if self._state:
return self._state
return self.dataset.state

def raw_set_dataset_state(self, state):
if state != self.dataset.state:
self.dataset.state = state
return True
else:
return False

def set_dataset_state(self, state):
if self.raw_set_dataset_state(state):
sa_session = object_session(self)
if sa_session:
object_session(self).add(self.dataset)
session = object_session(self)
with transaction(session):
session.commit() # flush here, because hda.flush() won't flush the Dataset object

state = property(get_dataset_state, set_dataset_state)
@state.setter
def state(self, state: Optional[DatasetState]):
if state != self.state:
if state in (DatasetState.FAILED_METADATA, DatasetState.SETTING_METADATA):
self._state = state
else:
self.set_metadata_succces_state()
sa_session = object_session(self)
if sa_session:
sa_session.add(self.dataset)
self.dataset.state = state

def set_metadata_succces_state(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Late to the party, but I guess that should have been set_metadata_success_state

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops, fixed in #17481

self._state = None

def get_file_name(self, sync_cache=True) -> str:
if self.dataset.purged:
Expand Down
7 changes: 3 additions & 4 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,10 +620,9 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
discarded_data = self.import_options.discarded_data
dataset_state = dataset_attrs.get("state", dataset_instance.states.OK)
if dataset_state == dataset_instance.states.DEFERRED:
dataset_instance._state = dataset_instance.states.DEFERRED
dataset_instance.state = dataset_instance.states.DEFERRED
dataset_instance.deleted = False
dataset_instance.purged = False
dataset_instance.dataset.state = dataset_instance.states.DEFERRED
dataset_instance.dataset.deleted = False
dataset_instance.dataset.purged = False
elif (
Expand All @@ -635,7 +634,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
target_state = (
dataset_instance.states.DISCARDED if is_discarded else dataset_instance.states.DEFERRED
)
dataset_instance._state = target_state
dataset_instance.state = target_state
deleted = is_discarded and (discarded_data == ImportDiscardedDataType.FORBID)
dataset_instance.deleted = deleted
dataset_instance.purged = deleted
Expand Down Expand Up @@ -708,7 +707,7 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
dataset_instance.datatype.set_meta(dataset_instance)
except Exception:
log.debug(f"Metadata setting failed on {dataset_instance}", exc_info=True)
dataset_instance.dataset.state = dataset_instance.dataset.states.FAILED_METADATA
dataset_instance.state = dataset_instance.dataset.states.FAILED_METADATA

if model_class == "HistoryDatasetAssociation":
if not isinstance(dataset_instance, model.HistoryDatasetAssociation):
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def create_dataset(

if init_from:
self.permission_provider.copy_dataset_permissions(init_from, primary_data)
primary_data.raw_set_dataset_state(init_from.state)
primary_data.state = init_from.state
else:
self.permission_provider.set_default_hda_permissions(primary_data)
else:
Expand All @@ -147,7 +147,7 @@ def create_dataset(

self.add_library_dataset_to_folder(library_folder, ld)
primary_data = ldda
primary_data.raw_set_dataset_state(final_job_state)
primary_data.state = final_job_state
if final_job_state == galaxy.model.Job.states.ERROR and not self.get_implicit_collection_jobs_association_id():
primary_data.visible = True

Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3020,7 +3020,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
metadata_set_successfully = False
log.exception("Exception occured while loading metadata results")
if not metadata_set_successfully:
dataset._state = model.Dataset.states.FAILED_METADATA
dataset.state = model.DatasetInstance.states.FAILED_METADATA
self.sa_session.add(dataset)
with transaction(self.sa_session):
self.sa_session.commit()
Expand All @@ -3033,7 +3033,7 @@ def exec_after_process(self, app, inp_data, out_data, param_dict, job=None, **kw
dataset.state = param_dict.get("__ORIGINAL_DATASET_STATE__")
else:
# Revert dataset.state to fall back to dataset.dataset.state
dataset._state = None
dataset.set_metadata_succces_state()
# Need to reset the peek, which may rely on metadata
# TODO: move this into metadata setting, setting the peek requires dataset access,
# and large chunks of the dataset may be read here.
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tools/actions/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def execute_via_app(
job.add_input_library_dataset(dataset_name, dataset)
# Need a special state here to show that metadata is being set and also allow the job to run
# i.e. if state was set to 'running' the set metadata job would never run, as it would wait for input (the dataset to set metadata on) to be in a ready state
dataset._state = dataset.states.SETTING_METADATA
dataset.state = dataset.states.SETTING_METADATA
job.state = start_job_state # job inputs have been configured, restore initial job state
with transaction(sa_session):
sa_session.commit()
Expand Down
4 changes: 2 additions & 2 deletions lib/galaxy/webapps/galaxy/services/history_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -1477,13 +1477,13 @@ def _change_item_datatype(
self.hda_manager.ensure_can_change_datatype(item)
self.hda_manager.ensure_can_set_metadata(item)
is_deferred = item.has_deferred_data
item.dataset.state = item.dataset.states.SETTING_METADATA
item.state = item.dataset.states.SETTING_METADATA
if is_deferred:
if params.datatype == "auto": # if `auto` just keep the original guessed datatype
item.update() # TODO: remove this `update` when we can properly track the operation results to notify the history
else:
trans.app.datatypes_registry.change_datatype(item, params.datatype)
item.dataset.state = item.dataset.states.DEFERRED
item.state = item.dataset.states.DEFERRED
else:
return change_datatype.si(
dataset_id=item.id, datatype=params.datatype, task_user_id=getattr(trans.user, "id", None)
Expand Down
2 changes: 1 addition & 1 deletion test/unit/app/tools/test_column_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def build_ready_hda(self):
extension="interval", create_dataset=True, sa_session=self.app.model.context
)
)
ready_hda.set_dataset_state("ok")
ready_hda.state = "ok"
return ready_hda

@property
Expand Down
10 changes: 5 additions & 5 deletions test/unit/app/tools/test_parameter_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,11 +226,11 @@ def test_DatasetOkValidator(self):
ok_hda = hist.add_dataset(
HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session)
)
ok_hda.set_dataset_state(Dataset.states.OK)
ok_hda.state = Dataset.states.OK
notok_hda = hist.add_dataset(
HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session)
)
notok_hda.set_dataset_state(Dataset.states.EMPTY)
notok_hda.state = Dataset.states.EMPTY

p = self._parameter_for(
xml="""
Expand Down Expand Up @@ -349,7 +349,7 @@ def test_MetadataValidator(self):
dataset=Dataset(external_filename=get_test_fname("1.bed")),
)
)
hda.set_dataset_state(Dataset.states.OK)
hda.state = Dataset.states.OK
hda.set_meta()
hda.metadata.strandCol = hda.metadata.spec["strandCol"].no_value

Expand Down Expand Up @@ -409,12 +409,12 @@ def test_UnspecifiedBuildValidator(self):
has_dbkey_hda = hist.add_dataset(
HistoryDatasetAssociation(id=1, extension="interval", create_dataset=True, sa_session=sa_session)
)
has_dbkey_hda.set_dataset_state(Dataset.states.OK)
has_dbkey_hda.state = Dataset.states.OK
has_dbkey_hda.metadata.dbkey = "hg19"
has_no_dbkey_hda = hist.add_dataset(
HistoryDatasetAssociation(id=2, extension="interval", create_dataset=True, sa_session=sa_session)
)
has_no_dbkey_hda.set_dataset_state(Dataset.states.OK)
has_no_dbkey_hda.state = Dataset.states.OK

p = self._parameter_for(
xml="""
Expand Down
Loading