From ab868b3e76ed750acab337d83969d9c4039fed4f Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:23:26 +0200 Subject: [PATCH 1/3] Use dataset's encoded_id instead of hid for export filenames --- lib/galaxy/model/store/__init__.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 1a38ebfd4ae4..1c8b7eb7b740 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -1961,8 +1961,6 @@ def add(src, dest): dir_name = "datasets" dir_path = os.path.join(export_directory, dir_name) - dataset_hid = as_dict["hid"] - assert dataset_hid, as_dict if dataset.dataset.id in self.dataset_id_to_path: file_name, extra_files_path = self.dataset_id_to_path[dataset.dataset.id] @@ -1981,7 +1979,7 @@ def add(src, dest): self.serialization_options.get_identifier(self.security, conversion) if conversion else None ) target_filename = get_export_dataset_filename( - as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, target_filename) src = file_name @@ -1997,7 +1995,7 @@ def add(src, dest): if len(file_list): extra_files_target_filename = get_export_dataset_extra_files_dir_name( - as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, extra_files_target_filename) add(extra_files_path, os.path.join(export_directory, arcname)) @@ -2967,22 +2965,22 @@ def tar_export_directory(export_directory: StrPath, out_file: StrPath, gzip: boo store_archive.add(os.path.join(export_directory, export_path), arcname=export_path) -def get_export_dataset_filename(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: +def get_export_dataset_filename(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: """ Builds a filename for a dataset using its name an extension. """ base = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in name) if not conversion_key: - return f"{base}_{hid}.{ext}" + return f"{base}_{encoded_id}.{ext}" else: - return f"{base}_{hid}_conversion_{conversion_key}.{ext}" + return f"{base}_{encoded_id}_conversion_{conversion_key}.{ext}" -def get_export_dataset_extra_files_dir_name(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: +def get_export_dataset_extra_files_dir_name(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: if not conversion_key: - return f"extra_files_path_{hid}" + return f"extra_files_path_{encoded_id}" else: - return f"extra_files_path_{hid}_conversion_{conversion_key}" + return f"extra_files_path_{encoded_id}_conversion_{conversion_key}" def imported_store_for_metadata( From 78bf048b3a7f53b4ae56f4a88a0a1eaacb1b79f6 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:24:56 +0200 Subject: [PATCH 2/3] Remove unused parameters in get_export_dataset_extra_files_dir_name --- lib/galaxy/model/store/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 1c8b7eb7b740..a855633fe81b 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -1995,7 +1995,7 @@ def add(src, dest): if len(file_list): extra_files_target_filename = get_export_dataset_extra_files_dir_name( - as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key + as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, extra_files_target_filename) add(extra_files_path, os.path.join(export_directory, arcname)) @@ -2976,7 +2976,7 @@ def get_export_dataset_filename(name: str, ext: str, encoded_id: str, conversion return f"{base}_{encoded_id}_conversion_{conversion_key}.{ext}" -def get_export_dataset_extra_files_dir_name(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: +def get_export_dataset_extra_files_dir_name(encoded_id: str, conversion_key: Optional[str]) -> str: if not conversion_key: return f"extra_files_path_{encoded_id}" else: From 419e4954f760e38b99715c353c4c3b346b105c25 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:06:28 +0200 Subject: [PATCH 3/3] Add test to verify export dataset without hid works --- test/unit/data/model/test_model_store.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index b2a3b0739103..f7c4ea1ce140 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -544,6 +544,21 @@ def validate_invocation_collection_crate_directory(crate_directory): assert dataset in root["hasPart"] +def test_export_history_with_missing_hid(): + # The dataset's hid was used to compose the file name during the export but it + # can be missing sometimes. We now use the dataset's encoded id instead. + app = _mock_app() + u, history, d1, d2, j = _setup_simple_cat_job(app) + + # Remove hid from d1 + d1.hid = None + app.commit() + + temp_directory = mkdtemp() + with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store: + export_store.export_history(history) + + def test_export_history_to_ro_crate(tmp_path): app = _mock_app() u, history, d1, d2, j = _setup_simple_cat_job(app)