From b20f4a06b1fc6680256245915afad4c609823ac7 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:23:26 +0200 Subject: [PATCH 1/3] Use dataset's encoded_id instead of hid for export filenames --- lib/galaxy/model/store/__init__.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 23d68dd64230..a0c187b3ffe2 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -1972,8 +1972,6 @@ def add(src, dest): dir_name = "datasets" dir_path = os.path.join(export_directory, dir_name) - dataset_hid = as_dict["hid"] - assert dataset_hid, as_dict if dataset.dataset.id in self.dataset_id_to_path: file_name, extra_files_path = self.dataset_id_to_path[dataset.dataset.id] @@ -1992,7 +1990,7 @@ def add(src, dest): self.serialization_options.get_identifier(self.security, conversion) if conversion else None ) target_filename = get_export_dataset_filename( - as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, target_filename) src = file_name @@ -2008,7 +2006,7 @@ def add(src, dest): if len(file_list): extra_files_target_filename = get_export_dataset_extra_files_dir_name( - as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, extra_files_target_filename) add(extra_files_path, os.path.join(export_directory, arcname)) @@ -2973,22 +2971,22 @@ def tar_export_directory(export_directory: StrPath, out_file: StrPath, gzip: boo store_archive.add(os.path.join(export_directory, export_path), arcname=export_path) -def get_export_dataset_filename(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: +def get_export_dataset_filename(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: """ Builds a filename for a dataset using its name an extension. """ base = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in name) if not conversion_key: - return f"{base}_{hid}.{ext}" + return f"{base}_{encoded_id}.{ext}" else: - return f"{base}_{hid}_conversion_{conversion_key}.{ext}" + return f"{base}_{encoded_id}_conversion_{conversion_key}.{ext}" -def get_export_dataset_extra_files_dir_name(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: +def get_export_dataset_extra_files_dir_name(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: if not conversion_key: - return f"extra_files_path_{hid}" + return f"extra_files_path_{encoded_id}" else: - return f"extra_files_path_{hid}_conversion_{conversion_key}" + return f"extra_files_path_{encoded_id}_conversion_{conversion_key}" def imported_store_for_metadata( From 6748d0e1f831249badddaea48e2cfa9c7d63dd55 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 11:24:56 +0200 Subject: [PATCH 2/3] Remove unused parameters in get_export_dataset_extra_files_dir_name --- lib/galaxy/model/store/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index a0c187b3ffe2..1b78f52ebcc8 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -2006,7 +2006,7 @@ def add(src, dest): if len(file_list): extra_files_target_filename = get_export_dataset_extra_files_dir_name( - as_dict["name"], as_dict["extension"], as_dict["encoded_id"], conversion_key=conversion_key + as_dict["encoded_id"], conversion_key=conversion_key ) arcname = os.path.join(dir_name, extra_files_target_filename) add(extra_files_path, os.path.join(export_directory, arcname)) @@ -2982,7 +2982,7 @@ def get_export_dataset_filename(name: str, ext: str, encoded_id: str, conversion return f"{base}_{encoded_id}_conversion_{conversion_key}.{ext}" -def get_export_dataset_extra_files_dir_name(name: str, ext: str, encoded_id: str, conversion_key: Optional[str]) -> str: +def get_export_dataset_extra_files_dir_name(encoded_id: str, conversion_key: Optional[str]) -> str: if not conversion_key: return f"extra_files_path_{encoded_id}" else: From b7eda89485cbef37f18b1c2ce6304b36565ebfd1 Mon Sep 17 00:00:00 2001 From: davelopez <46503462+davelopez@users.noreply.github.com> Date: Thu, 25 Apr 2024 12:06:28 +0200 Subject: [PATCH 3/3] Add test to verify export dataset without hid works --- test/unit/data/model/test_model_store.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 5e0163ce12bb..7fae0bdcbdd3 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -545,6 +545,21 @@ def validate_invocation_collection_crate_directory(crate_directory): assert dataset in root["hasPart"] +def test_export_history_with_missing_hid(): + # The dataset's hid was used to compose the file name during the export but it + # can be missing sometimes. We now use the dataset's encoded id instead. + app = _mock_app() + u, history, d1, d2, j = _setup_simple_cat_job(app) + + # Remove hid from d1 + d1.hid = None + app.commit() + + temp_directory = mkdtemp() + with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store: + export_store.export_history(history) + + def test_export_history_to_ro_crate(tmp_path): app = _mock_app() u, history, d1, d2, j = _setup_simple_cat_job(app)