From c71f944093a538e134cefb4f1731e2f6a56f0047 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 12 Aug 2024 12:11:33 +0200 Subject: [PATCH] Make sure we set file size also for purged outputs Fixes one of the integration tests: ``` Exception: HistoryDatasetAssociation in state ok with null file size, this is not valid ERROR galaxy.jobs.runners:__init__.py:177 (1) Unhandled exception calling finish_job Traceback (most recent call last): File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/jobs/runners/pulsar.py", line 702, in finish_job job_wrapper.finish( File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/jobs/__init__.py", line 2054, in finish self.sa_session.commit() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/scoping.py", line 597, in commit return self._proxied.commit() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2017, in commit trans.commit(_to_root=True) File "", line 2, in commit File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go ret_value = fn(self, *arg, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1302, in commit self._prepare_impl() File "", line 2, in _prepare_impl File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go ret_value = fn(self, *arg, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1277, in _prepare_impl self.session.flush() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 4341, in flush self._flush(objects) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 4369, in _flush self.dispatch.before_flush(self, flush_context, objects) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/event/attr.py", line 378, in __call__ fn(*args, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/model/base.py", line 184, in before_flush obj.__strict_check_before_flush__() File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/model/__init__.py", line 5223, in __strict_check_before_flush__ raise Exception( Exception: HistoryDatasetAssociation in state ok with null file size, this is not valid During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/jobs/runners/__init__.py", line 174, in run_next method(arg) File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/jobs/runners/pulsar.py", line 713, in finish_job job_wrapper.fail("Unable to finish job", exception=True) File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/jobs/__init__.py", line 1468, in fail self.sa_session.commit() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/scoping.py", line 597, in commit return self._proxied.commit() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 2017, in commit trans.commit(_to_root=True) File "", line 2, in commit File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go ret_value = fn(self, *arg, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1302, in commit self._prepare_impl() File "", line 2, in _prepare_impl File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/state_changes.py", line 139, in _go ret_value = fn(self, *arg, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 1277, in _prepare_impl self.session.flush() File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 4341, in flush self._flush(objects) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/orm/session.py", line 4369, in _flush self.dispatch.before_flush(self, flush_context, objects) File "/home/runner/work/galaxy/galaxy/galaxy root/.venv/lib/python3.8/site-packages/sqlalchemy/event/attr.py", line 378, in __call__ fn(*args, **kw) File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/model/base.py", line 184, in before_flush obj.__strict_check_before_flush__() File "/home/runner/work/galaxy/galaxy/galaxy root/lib/galaxy/model/__init__.py", line 5223, in __strict_check_before_flush__ raise Exception( Exception: HistoryDatasetAssociation in state ok with null file size, this is not valid ``` More fallout from https://github.com/galaxyproject/galaxy/pull/18653 --- lib/galaxy/jobs/__init__.py | 10 +++++----- lib/galaxy/model/store/__init__.py | 6 +++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py index e6fd99f2340e..86098172de86 100644 --- a/lib/galaxy/jobs/__init__.py +++ b/lib/galaxy/jobs/__init__.py @@ -2001,13 +2001,13 @@ def fail(message=job.info, exception=None): # Once datasets are collected, set the total dataset size (includes extra files) for dataset_assoc in job.output_datasets: dataset = dataset_assoc.dataset.dataset - if not dataset.purged: - # assume all datasets in a job get written to the same objectstore - quota_source_info = dataset.quota_source_info - collected_bytes += dataset.set_total_size() - else: + # assume all datasets in a job get written to the same objectstore + quota_source_info = dataset.quota_source_info + collected_bytes += dataset.set_total_size() + if dataset.purged: # Purge, in case job wrote directly to object store dataset.full_delete() + collected_bytes = 0 user = job.user if user and collected_bytes > 0 and quota_source_info is not None and quota_source_info.use: diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 4880b83e7b96..1ea9fcf69855 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -665,9 +665,9 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs): assert file_source_root dataset_extra_files_path = os.path.join(file_source_root, dataset_extra_files_path) persist_extra_files(self.object_store, dataset_extra_files_path, dataset_instance) - # Don't trust serialized file size - dataset_instance.dataset.file_size = None - dataset_instance.dataset.set_total_size() # update the filesize record in the database + # Only trust file size if the dataset is purged. If we keep the data we should check the file size. + dataset_instance.dataset.file_size = None + dataset_instance.dataset.set_total_size() # update the filesize record in the database if dataset_instance.deleted: dataset_instance.dataset.deleted = True