Skip to content

Commit

Permalink
Don't push purged dataset contents to object store
Browse files Browse the repository at this point in the history
This in particular needs a lot of new tests.
We will also need to actively purge datasets in the model store import
code, since users might have purged datasets while the job ran.
Again, more tests needed.
  • Loading branch information
mvdbeek committed Jun 7, 2024
1 parent 33556c6 commit ed022ba
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 20 deletions.
4 changes: 2 additions & 2 deletions lib/galaxy/metadata/set_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def push_if_necessary(object_store: ObjectStore, dataset: DatasetInstance, exter
# or a remote object store from its cache path.
# empty files could happen when outputs are discovered from working dir,
# empty file check needed for e.g. test/integration/test_extended_metadata_outputs_to_working_directory.py::test_tools[multi_output_assign_primary]
if os.path.getsize(external_filename):
if not dataset.dataset.purged and os.path.getsize(external_filename):
object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True)


Expand Down Expand Up @@ -477,7 +477,7 @@ def set_meta(new_dataset_instance, file_dict):
object_store_update_actions.append(partial(reset_external_filename, dataset))
object_store_update_actions.append(partial(dataset.set_total_size))
object_store_update_actions.append(partial(export_store.add_dataset, dataset))
if dataset_instance_id not in unnamed_id_to_path:
if dataset_instance_id not in unnamed_id_to_path and not dataset.dataset.purged:
object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, "."))
dataset_state = "deferred" if (is_deferred and final_job_state == "ok") else final_job_state
if not dataset.state == dataset.states.ERROR:
Expand Down
15 changes: 8 additions & 7 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9485,13 +9485,14 @@ def dataset(self) -> Optional[Dataset]:
def update_from_file(self, file_name):
if not self.dataset:
raise Exception("Attempted to write MetadataFile, but no DatasetAssociation set")
self.dataset.object_store.update_from_file(
self,
file_name=file_name,
extra_dir="_metadata_files",
extra_dir_at_root=True,
alt_name=os.path.basename(self.get_file_name()),
)
if not self.dataset.purged:
self.dataset.object_store.update_from_file(
self,
file_name=file_name,
extra_dir="_metadata_files",
extra_dir_at_root=True,
alt_name=os.path.basename(self.get_file_name()),
)

def get_file_name(self, sync_cache=True):
# Ensure the directory structure and the metadata file object exist
Expand Down
21 changes: 11 additions & 10 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,17 +654,18 @@ def handle_dataset_object_edit(dataset_instance, dataset_attrs):
dataset_instance.state = dataset_state
if not self.object_store:
raise Exception(f"self.object_store is missing from {self}.")
self.object_store.update_from_file(
dataset_instance.dataset, file_name=temp_dataset_file_name, create=True
)
if not dataset_instance.dataset.purged:
self.object_store.update_from_file(
dataset_instance.dataset, file_name=temp_dataset_file_name, create=True
)

# Import additional files if present. Histories exported previously might not have this attribute set.
dataset_extra_files_path = dataset_attrs.get("extra_files_path", None)
if dataset_extra_files_path:
assert file_source_root
dataset_extra_files_path = os.path.join(file_source_root, dataset_extra_files_path)
persist_extra_files(self.object_store, dataset_extra_files_path, dataset_instance)
# Don't trust serialized file size
# Import additional files if present. Histories exported previously might not have this attribute set.
dataset_extra_files_path = dataset_attrs.get("extra_files_path", None)
if dataset_extra_files_path:
assert file_source_root
dataset_extra_files_path = os.path.join(file_source_root, dataset_extra_files_path)
persist_extra_files(self.object_store, dataset_extra_files_path, dataset_instance)
# Don't trust serialized file size
dataset_instance.dataset.file_size = None
dataset_instance.dataset.set_total_size() # update the filesize record in the database

Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/model/store/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ def create_dataset(
return primary_data

def finalize_storage(self, primary_data, dataset_attributes, extra_files, filename, link_data, output_name):
if primary_data.dataset.purged:
# metadata won't be set, maybe we should do that, then purge ?
return
# Move data from temp location to dataset location
if not link_data:
dataset = primary_data.dataset
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/objectstore/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,7 +1670,7 @@ def persist_extra_files(
primary_data: "DatasetInstance",
extra_files_path_name: Optional[str] = None,
) -> None:
if os.path.exists(src_extra_files_path):
if not primary_data.dataset.purged and os.path.exists(src_extra_files_path):
assert primary_data.dataset
if not extra_files_path_name:
extra_files_path_name = primary_data.dataset.extra_files_path_name_from(object_store)
Expand Down

0 comments on commit ed022ba

Please sign in to comment.