From 570a05541a064001a4ab147700bb39071f48a746 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 23 Jan 2024 12:57:45 -0500 Subject: [PATCH 1/5] Reduce duplication in test_model_store. --- test/unit/data/model/test_model_store.py | 28 ++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index e9d222a9c159..6baa29bc77e4 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -73,9 +73,7 @@ def test_import_export_history_hidden_false_with_hidden_dataset(): u, h, d1, d2, j = _setup_simple_cat_job(app) d2.visible = False - session = app.model.session - with transaction(session): - session.commit() + app.commit() imported_history = _import_export_history(app, h, export_files="copy", include_hidden=False) assert d1.dataset.get_size() == imported_history.datasets[0].get_size() @@ -87,9 +85,7 @@ def test_import_export_history_hidden_true_with_hidden_dataset(): u, h, d1, d2, j = _setup_simple_cat_job(app) d2.visible = False - session = app.model.session - with transaction(session): - session.commit() + app.commit() imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) assert d1.dataset.get_size() == imported_history.datasets[0].get_size() @@ -251,8 +247,7 @@ def test_import_library_require_permissions(): root_folder = model.LibraryFolder(name="my library 1", description="folder description") library.root_folder = root_folder sa_session.add_all((library, root_folder)) - with transaction(sa_session): - sa_session.commit() + app.commit() temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app) as export_store: @@ -280,8 +275,7 @@ def test_import_export_library(): root_folder = model.LibraryFolder(name="my library 1", description="folder description") library.root_folder = root_folder sa_session.add_all((library, root_folder)) - with transaction(sa_session): - sa_session.commit() + app.commit() subfolder = model.LibraryFolder(name="sub folder 1", description="sub folder") root_folder.add_folder(subfolder) @@ -295,8 +289,7 @@ def test_import_export_library(): sa_session.add(ld) sa_session.add(ldda) - with transaction(sa_session): - sa_session.commit() + app.commit() assert len(root_folder.datasets) == 1 assert len(root_folder.folders) == 1 @@ -338,8 +331,7 @@ def test_import_export_invocation(): sa_session = app.model.context h2 = model.History(user=workflow_invocation.user) sa_session.add(h2) - with transaction(sa_session): - sa_session.commit() + app.commit() import_model_store = store.get_import_model_store_for_directory( temp_directory, app=app, user=workflow_invocation.user, import_options=store.ImportOptions() @@ -1054,6 +1046,11 @@ def read_workflow_from_path(self, app, user, path, allow_in_directory=None): class TestApp(GalaxyDataTestApp): workflow_contents_manager = MockWorkflowContentsManager() + def commit(self): + session = self.model.session + with transaction(session): + session.commit() + def _mock_app(store_by=DEFAULT_OBJECT_STORE_BY): app = TestApp() @@ -1091,8 +1088,7 @@ def setup_fixture_context_with_history( app, sa_session, user = setup_fixture_context_with_user(**kwd) history = model.History(name=history_name, user=user) sa_session.add(history) - with transaction(sa_session): - sa_session.commit() + app.commit() return StoreFixtureContextWithHistory(app, sa_session, user, history) From 2259e1f6332e370f248994ac9b1e46e8fea9fe6f Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 23 Jan 2024 13:04:04 -0500 Subject: [PATCH 2/5] More de-duplication in test_model_store.py. --- test/unit/data/model/test_model_store.py | 51 ++++++++---------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 6baa29bc77e4..6098a88b046b 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -607,9 +607,7 @@ def test_import_export_edit_collection(): sa_session.add(hc1) sa_session.add(h) import_history = model.History(name="Test History for Import", user=u) - sa_session.add(import_history) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(import_history) temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, for_edit=True) as export_store: @@ -682,9 +680,7 @@ def test_import_export_composite_datasets(): d1 = _create_datasets(sa_session, h, 1, extension="html")[0] d1.dataset.create_extra_files_path() - sa_session.add_all((h, d1)) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(h, d1) primary = NamedTemporaryFile("w") primary.write("cool primary file") @@ -709,9 +705,7 @@ def test_import_export_composite_datasets(): export_store.add_dataset(d1) import_history = model.History(name="Test History for Import", user=u) - sa_session.add(import_history) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(import_history) _perform_import_from_directory(temp_directory, app, u, import_history) assert len(import_history.datasets) == 1 import_dataset = import_history.datasets[0] @@ -734,9 +728,7 @@ def test_edit_metadata_files(): h = model.History(name="Test History", user=u) d1 = _create_datasets(sa_session, h, 1, extension="bam")[0] - sa_session.add_all((h, d1)) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(h, d1) index = NamedTemporaryFile("w") index.write("cool bam index") metadata_dict = {"bam_index": MetadataTempFile.from_JSON({"kwds": {}, "filename": index.name})} @@ -751,9 +743,7 @@ def test_edit_metadata_files(): export_store.add_dataset(d1) import_history = model.History(name="Test History for Import", user=u) - sa_session.add(import_history) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(import_history) _perform_import_from_directory(temp_directory, app, u, import_history, store.ImportOptions(allow_edit=True)) @@ -790,12 +780,8 @@ def _setup_simple_export(export_kwds): u, h, d1, d2, j = _setup_simple_cat_job(app) - sa_session = app.model.context - import_history = model.History(name="Test History for Import", user=u) - sa_session.add(import_history) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(import_history) temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, **export_kwds) as export_store: @@ -843,9 +829,7 @@ def _setup_simple_cat_job(app, state="ok"): j.add_input_dataset("input1", d1) j.add_output_dataset("out_file1", d2) - sa_session.add_all((d1, d2, h, j)) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(d1, d2, h, j) app.object_store.update_from_file(d1, file_name=TEST_PATH_1, create=True) app.object_store.update_from_file(d2, file_name=TEST_PATH_2, create=True) @@ -880,9 +864,7 @@ def _setup_invocation(app): workflow_invocation.add_input(d1, step=workflow_step_1) wf_output = model.WorkflowOutput(workflow_step_1, label="output_label") workflow_invocation.add_output(wf_output, workflow_step_1, d2) - sa_session.add(workflow_invocation) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(workflow_invocation) return workflow_invocation @@ -928,8 +910,7 @@ def _setup_simple_collection_job(app, state="ok"): sa_session.add(hc2) sa_session.add(hc3) sa_session.add(j) - with transaction(sa_session): - sa_session.commit() + app.commit() return u, h, c1, c2, c3, hc1, hc2, hc3, j @@ -955,9 +936,7 @@ def _setup_collection_invocation(app): wf_output = model.WorkflowOutput(workflow_step_1, label="output_label") workflow_invocation.add_output(wf_output, workflow_step_1, hc3) - sa_session.add(workflow_invocation) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(workflow_invocation) return workflow_invocation @@ -1036,16 +1015,18 @@ def read_workflow_from_path(self, app, user, path, allow_in_directory=None): workflow = model.Workflow() workflow.steps = [workflow_step_1] stored_workflow.latest_workflow = workflow - sa_session = app.model.context - sa_session.add_all((stored_workflow, workflow)) - with transaction(sa_session): - sa_session.commit() + app.add_and_commit(stored_workflow, workflow) return workflow class TestApp(GalaxyDataTestApp): workflow_contents_manager = MockWorkflowContentsManager() + def add_and_commit(self, *objs): + session = self.model.session + session.add_all(objs) + self.commit() + def commit(self): session = self.model.session with transaction(session): From cf8e72ff98b76c20353ce8a9b64f7adb4976c8c3 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Tue, 23 Jan 2024 13:08:37 -0500 Subject: [PATCH 3/5] Minimal test case demonstrating model stores break on implicit conversions. --- lib/galaxy/model/__init__.py | 36 ++++++++-- lib/galaxy/model/store/__init__.py | 88 ++++++++++++++++++++++-- test/unit/data/model/2.txt | 1 + test/unit/data/model/test_model_store.py | 27 ++++++++ 4 files changed, 142 insertions(+), 10 deletions(-) create mode 100644 test/unit/data/model/2.txt diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 9d38bda53b32..c73b564cbf5b 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -4674,12 +4674,14 @@ def get_converted_dataset(self, trans, target_ext, target_context=None, history= ).values() ) ) + return self.attach_implicitly_converted_dataset(trans.sa_session, new_dataset, target_ext) + + def attach_implicitly_converted_dataset(self, session, new_dataset, target_ext: str): new_dataset.name = self.name self.copy_attributes(new_dataset) assoc = ImplicitlyConvertedDatasetAssociation( parent=self, file_type=target_ext, dataset=new_dataset, metadata_safe=False ) - session = trans.sa_session session.add(new_dataset) session.add(assoc) with transaction(session): @@ -6016,7 +6018,7 @@ def inheritable(self): return True # always allow inheriting, used for replacement -class ImplicitlyConvertedDatasetAssociation(Base, RepresentById): +class ImplicitlyConvertedDatasetAssociation(Base, Serializable): __tablename__ = "implicitly_converted_dataset_association" id = Column(Integer, primary_key=True) @@ -6054,7 +6056,15 @@ class ImplicitlyConvertedDatasetAssociation(Base, RepresentById): ) def __init__( - self, id=None, parent=None, dataset=None, file_type=None, deleted=False, purged=False, metadata_safe=True + self, + id=None, + parent=None, + dataset=None, + file_type=None, + deleted=False, + purged=False, + metadata_safe=True, + for_import=False, ): self.id = id add_object_to_object_session(self, dataset) @@ -6062,14 +6072,18 @@ def __init__( self.dataset = dataset elif isinstance(dataset, LibraryDatasetDatasetAssociation): self.dataset_ldda = dataset - else: + elif not for_import: raise AttributeError(f"Unknown dataset type provided for dataset: {type(dataset)}") + # else if for import - these connections might not have been included in the store, + # recover the data we can? if isinstance(parent, HistoryDatasetAssociation): self.parent_hda = parent elif isinstance(parent, LibraryDatasetDatasetAssociation): self.parent_ldda = parent - else: + elif not for_import: raise AttributeError(f"Unknown dataset type provided for parent: {type(parent)}") + # else if for import - these connections might not have been included in the store, + # recover the data we can? self.type = file_type self.deleted = deleted self.purged = purged @@ -6089,6 +6103,18 @@ def clear(self, purge=False, delete_dataset=True): except Exception as e: log.error(f"Failed to purge associated file ({self.get_file_name()}) from disk: {unicodify(e)}") + def _serialize(self, id_encoder, serialization_options): + rval = dict_for( + self, + file_type=self.type, + ) + if self.parent_hda: + rval["parent_hda"] = serialization_options.get_identifier(id_encoder, self.parent_hda) + if self.dataset: + rval["hda"] = serialization_options.get_identifier(id_encoder, self.dataset) + serialization_options.attach_identifier(id_encoder, self, rval) + return rval + DEFAULT_COLLECTION_NAME = "Unnamed Collection" diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index 973cec825999..cabd6472bd81 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -133,12 +133,14 @@ ATTRS_FILENAME_LIBRARIES = "libraries_attrs.txt" ATTRS_FILENAME_LIBRARY_FOLDERS = "library_folders_attrs.txt" ATTRS_FILENAME_INVOCATIONS = "invocation_attrs.txt" +ATTRS_FILENAME_CONVERSIONS = "implicit_dataset_conversions.txt" TRACEBACK = "traceback.txt" GALAXY_EXPORT_VERSION = "2" DICT_STORE_ATTRS_KEY_HISTORY = "history" DICT_STORE_ATTRS_KEY_DATASETS = "datasets" DICT_STORE_ATTRS_KEY_COLLECTIONS = "collections" +DICT_STORE_ATTRS_KEY_CONVERSIONS = "implicit_dataset_conversions" DICT_STORE_ATTRS_KEY_JOBS = "jobs" DICT_STORE_ATTRS_KEY_IMPLICIT_COLLECTION_JOBS = "implicit_collection_jobs" DICT_STORE_ATTRS_KEY_LIBRARIES = "libraries" @@ -296,6 +298,10 @@ def invocations_properties(self) -> List[Dict[str, Any]]: def collections_properties(self) -> List[Dict[str, Any]]: """Return a list of HDCA properties.""" + @abc.abstractmethod + def implicit_dataset_conversion_properties(self) -> List[Dict[str, Any]]: + """Return a list of ImplicitlyConvertedDatasetAssociation properties.""" + @abc.abstractmethod def jobs_properties(self) -> List[Dict[str, Any]]: """Return a list of jobs properties.""" @@ -385,6 +391,7 @@ def perform_import( self._import_collection_instances(object_import_tracker, collections_attrs, history, new_history) self._import_collection_implicit_input_associations(object_import_tracker, collections_attrs) self._import_collection_copied_associations(object_import_tracker, collections_attrs) + self._import_implicit_dataset_conversions(object_import_tracker) self._reassign_hids(object_import_tracker, history) self._import_jobs(object_import_tracker, history) self._import_implicit_collection_jobs(object_import_tracker) @@ -1001,6 +1008,13 @@ def _reassign_hids(self, object_import_tracker: "ObjectImportTracker", history: history.stage_addition(obj) history.add_pending_items() + if object_import_tracker.copy_hid_for: + # in an if to avoid flush if unneeded + for from_dataset, to_dataset in object_import_tracker.copy_hid_for.items(): + to_dataset.hid = from_dataset.hid + self._session_add(to_dataset) + self._flush() + def _import_workflow_invocations( self, object_import_tracker: "ObjectImportTracker", history: Optional[model.History] ) -> None: @@ -1239,6 +1253,29 @@ def _import_jobs(self, object_import_tracker: "ObjectImportTracker", history: Op if object_key in job_attrs: object_import_tracker.jobs_by_key[job_attrs[object_key]] = imported_job + def _import_implicit_dataset_conversions(self, object_import_tracker: "ObjectImportTracker") -> None: + implicit_dataset_conversion_attrs = self.implicit_dataset_conversion_properties() + for idc_attrs in implicit_dataset_conversion_attrs: + # I don't know what metadata_safe does per se... should we copy this property or + # just set it to False? + metadata_safe = False + idc = model.ImplicitlyConvertedDatasetAssociation(metadata_safe=metadata_safe, for_import=True) + idc.type = idc_attrs["file_type"] + if idc_attrs.get("parent_hda"): + idc.parent_hda = object_import_tracker.hdas_by_key[idc_attrs["parent_hda"]] + if idc_attrs.get("hda"): + idc.dataset = object_import_tracker.hdas_by_key[idc_attrs["hda"]] + + # we have a the dataset and the parent, lets ensure they land up with the same HID + if idc.dataset and idc.parent_hda and idc.parent_hda in object_import_tracker.requires_hid: + try: + object_import_tracker.requires_hid.remove(idc.dataset) + except ValueError: + pass # we wanted to remove it anyway. + object_import_tracker.copy_hid_for[idc.parent_hda] = idc.dataset + + self._session_add(idc) + def _import_implicit_collection_jobs(self, object_import_tracker: "ObjectImportTracker") -> None: object_key = self.object_key @@ -1300,6 +1337,9 @@ def _copied_from_object_key( return copied_from_object_key +HasHid = Union[model.HistoryDatasetAssociation, model.HistoryDatasetCollectionAssociation] + + class ObjectImportTracker: """Keep track of new and existing imported objects. @@ -1317,7 +1357,8 @@ class ObjectImportTracker: hda_copied_from_sinks: Dict[ObjectKeyType, ObjectKeyType] hdca_copied_from_sinks: Dict[ObjectKeyType, ObjectKeyType] jobs_by_key: Dict[ObjectKeyType, model.Job] - requires_hid: List[Union[model.HistoryDatasetAssociation, model.HistoryDatasetCollectionAssociation]] + requires_hid: List[HasHid] + copy_hid_for: Dict[HasHid, HasHid] def __init__(self) -> None: self.libraries_by_key = {} @@ -1335,6 +1376,7 @@ def __init__(self) -> None: self.implicit_collection_jobs_by_key: Dict[str, ImplicitCollectionJobs] = {} self.workflows_by_key: Dict[str, model.Workflow] = {} self.requires_hid = [] + self.copy_hid_for = {} self.new_history: Optional[model.History] = None @@ -1418,6 +1460,9 @@ def datasets_properties( def collections_properties(self) -> List[Dict[str, Any]]: return self._store_as_dict.get(DICT_STORE_ATTRS_KEY_COLLECTIONS) or [] + def implicit_dataset_conversion_properties(self) -> List[Dict[str, Any]]: + return self._store_as_dict.get(DICT_STORE_ATTRS_KEY_CONVERSIONS) or [] + def library_properties( self, ) -> List[Dict[str, Any]]: @@ -1494,6 +1539,9 @@ def datasets_properties(self) -> List[Dict[str, Any]]: def collections_properties(self) -> List[Dict[str, Any]]: return self._read_list_if_exists(ATTRS_FILENAME_COLLECTIONS) + def implicit_dataset_conversion_properties(self) -> List[Dict[str, Any]]: + return self._read_list_if_exists(ATTRS_FILENAME_CONVERSIONS) + def library_properties( self, ) -> List[Dict[str, Any]]: @@ -1860,6 +1908,7 @@ def __init__( ) self.export_files = export_files self.included_datasets: Dict[model.DatasetInstance, Tuple[model.DatasetInstance, bool]] = {} + self.dataset_implicit_conversions: Dict[model.DatasetInstance, model.ImplicitlyConvertedDatasetAssociation] = {} self.included_collections: List[Union[model.DatasetCollection, model.HistoryDatasetCollectionAssociation]] = [] self.included_libraries: List[model.Library] = [] self.included_library_folders: List[model.LibraryFolder] = [] @@ -1928,7 +1977,13 @@ def add(src, dest): if not os.path.exists(dir_path): os.makedirs(dir_path) - target_filename = get_export_dataset_filename(as_dict["name"], as_dict["extension"], dataset_hid) + conversion = self.dataset_implicit_conversions.get(dataset) + conversion_key = ( + self.serialization_options.get_identifier(self.security, conversion) if conversion else None + ) + target_filename = get_export_dataset_filename( + as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + ) arcname = os.path.join(dir_name, target_filename) src = file_name @@ -2139,7 +2194,13 @@ def export_history( if dataset not in self.included_datasets: if should_include_file: self._ensure_dataset_file_exists(dataset) - self.add_dataset(dataset, include_files=should_include_file) + if dataset.implicitly_converted_parent_datasets: + # fetching 0th of list but I think this is just a mapping quirk - I can't imagine how there + # would be more than one of these -John + conversion = dataset.implicitly_converted_parent_datasets[0] + self.add_implicit_conversion_dataset(dataset, should_include_file, conversion) + else: + self.add_dataset(dataset, include_files=should_include_file) def export_library( self, library: model.Library, include_hidden: bool = False, include_deleted: bool = False @@ -2218,6 +2279,15 @@ def add_dataset_collection( self.collections_attrs.append(collection) self.included_collections.append(collection) + def add_implicit_conversion_dataset( + self, + dataset: model.DatasetInstance, + include_files: bool, + conversion: model.ImplicitlyConvertedDatasetAssociation, + ) -> None: + self.included_datasets[dataset] = (dataset, include_files) + self.dataset_implicit_conversions[dataset] = conversion + def add_dataset(self, dataset: model.DatasetInstance, include_files: bool = True) -> None: self.included_datasets[dataset] = (dataset, include_files) @@ -2264,6 +2334,10 @@ def to_json(attributes): with open(collections_attrs_filename, "w") as collections_attrs_out: collections_attrs_out.write(to_json(self.collections_attrs)) + conversions_attrs_filename = os.path.join(export_directory, ATTRS_FILENAME_CONVERSIONS) + with open(conversions_attrs_filename, "w") as conversions_attrs_out: + conversions_attrs_out.write(to_json(self.dataset_implicit_conversions.values())) + jobs_attrs = [] for job_id, job_output_dataset_associations in self.job_output_dataset_associations.items(): output_dataset_mapping: Dict[str, List[Union[str, int]]] = {} @@ -2387,6 +2461,7 @@ class WriteCrates: included_invocations: List[model.WorkflowInvocation] export_directory: StrPath included_datasets: Dict[model.DatasetInstance, Tuple[model.DatasetInstance, bool]] + dataset_implicit_conversions: Dict[model.DatasetInstance, model.ImplicitlyConvertedDatasetAssociation] dataset_id_to_path: Dict[int, Tuple[Optional[str], Optional[str]]] @property @@ -2891,12 +2966,15 @@ def tar_export_directory(export_directory: StrPath, out_file: StrPath, gzip: boo store_archive.add(os.path.join(export_directory, export_path), arcname=export_path) -def get_export_dataset_filename(name: str, ext: str, hid: int) -> str: +def get_export_dataset_filename(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: """ Builds a filename for a dataset using its name an extension. """ base = "".join(c in FILENAME_VALID_CHARS and c or "_" for c in name) - return f"{base}_{hid}.{ext}" + if not conversion_key: + return f"{base}_{hid}.{ext}" + else: + return f"{base}_{hid}_conversion_{conversion_key}.{ext}" def imported_store_for_metadata( diff --git a/test/unit/data/model/2.txt b/test/unit/data/model/2.txt new file mode 100644 index 000000000000..af93bee4d4b2 --- /dev/null +++ b/test/unit/data/model/2.txt @@ -0,0 +1 @@ +converted bed \ No newline at end of file diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 6098a88b046b..fd540dfa7045 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -43,6 +43,7 @@ TESTCASE_DIRECTORY = pathlib.Path(__file__).parent TEST_PATH_1 = TESTCASE_DIRECTORY / "1.txt" TEST_PATH_2 = TESTCASE_DIRECTORY / "2.bed" +TEST_PATH_2_CONVERTED = TESTCASE_DIRECTORY / "2.txt" DEFAULT_OBJECT_STORE_BY = "id" @@ -120,6 +121,32 @@ def test_import_export_history_allow_discarded_data(): assert imported_job.output_datasets[0].dataset == datasets[1] +def test_import_export_history_with_implicit_conversion(): + app = _mock_app() + + u, h, d1, d2, j = _setup_simple_cat_job(app) + + convert_ext = "fasta" + implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h) + implicit_hda.hid = d2.hid + # this adds and flushes the result... + d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext) + app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True) + + assert len(h.active_datasets) == 3 + imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) + + assert len(imported_history.active_datasets) == 3 + recovered_hda_2 = imported_history.active_datasets[1] + assert recovered_hda_2.implicitly_converted_datasets + imported_conversion = recovered_hda_2.implicitly_converted_datasets[0] + assert imported_conversion.type == "fasta" + assert imported_conversion.dataset == imported_history.active_datasets[2] + + # implicit conversions have the same HID... ensure this property is recovered... + assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid + + def test_import_export_bag_archive(): """Test a simple job import/export using a BagIt archive.""" dest_parent = mkdtemp() From b8cf19f0bde49dc8631df2c2e7de485efd84e724 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 25 Jan 2024 09:49:44 -0500 Subject: [PATCH 4/5] Utilities for higher level composite tests in test_model_store. --- test/unit/data/model/test_model_store.py | 40 ++++++++++++++---------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index fd540dfa7045..58e4b74d8558 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -709,23 +709,8 @@ def test_import_export_composite_datasets(): d1.dataset.create_extra_files_path() app.add_and_commit(h, d1) - primary = NamedTemporaryFile("w") - primary.write("cool primary file") - primary.flush() - app.object_store.update_from_file(d1.dataset, file_name=primary.name, create=True, preserve_symlinks=True) - - composite1 = NamedTemporaryFile("w") - composite1.write("cool composite file") - composite1.flush() - - app.object_store.update_from_file( - d1.dataset, - extra_dir=os.path.normpath(os.path.join(d1.extra_files_path, "parent_dir")), - alt_name="child_file", - file_name=composite1.name, - create=True, - preserve_symlinks=True, - ) + app.write_primary_file(d1, "cool primary file") + app.write_composite_file(d1, "cool composite file", "child_file") temp_directory = mkdtemp() with store.DirectoryModelExportStore(temp_directory, app=app, export_files="copy") as export_store: @@ -1059,6 +1044,27 @@ def commit(self): with transaction(session): session.commit() + def write_primary_file(self, dataset_instance, contents): + primary = NamedTemporaryFile("w") + primary.write(contents) + primary.flush() + self.object_store.update_from_file(dataset_instance.dataset, file_name=primary.name, create=True, preserve_symlinks=True) + + def write_composite_file(self, dataset_instance, contents, file_name): + composite1 = NamedTemporaryFile("w") + composite1.write(contents) + composite1.flush() + + dataset_instance.dataset.create_extra_files_path() + self.object_store.update_from_file( + dataset_instance.dataset, + extra_dir=os.path.normpath(os.path.join(dataset_instance.extra_files_path, "parent_dir")), + alt_name=file_name, + file_name=composite1.name, + create=True, + preserve_symlinks=True, + ) + def _mock_app(store_by=DEFAULT_OBJECT_STORE_BY): app = TestApp() From 87321dcffa52e4ced775b0bd89107696b59ab199 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Thu, 25 Jan 2024 10:03:06 -0500 Subject: [PATCH 5/5] Fix overlapping names in extra files for model stores. --- lib/galaxy/model/store/__init__.py | 13 ++++- test/unit/data/model/test_model_store.py | 60 ++++++++++++++++++++++-- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/model/store/__init__.py b/lib/galaxy/model/store/__init__.py index cabd6472bd81..4ea835bbd2b8 100644 --- a/lib/galaxy/model/store/__init__.py +++ b/lib/galaxy/model/store/__init__.py @@ -1985,7 +1985,6 @@ def add(src, dest): as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key ) arcname = os.path.join(dir_name, target_filename) - src = file_name dest = os.path.join(export_directory, arcname) add(src, dest) @@ -1998,7 +1997,10 @@ def add(src, dest): file_list = [] if len(file_list): - arcname = os.path.join(dir_name, f"extra_files_path_{dataset_hid}") + extra_files_target_filename = get_export_dataset_extra_files_dir_name( + as_dict["name"], as_dict["extension"], dataset_hid, conversion_key=conversion_key + ) + arcname = os.path.join(dir_name, extra_files_target_filename) add(extra_files_path, os.path.join(export_directory, arcname)) as_dict["extra_files_path"] = arcname else: @@ -2977,6 +2979,13 @@ def get_export_dataset_filename(name: str, ext: str, hid: int, conversion_key: O return f"{base}_{hid}_conversion_{conversion_key}.{ext}" +def get_export_dataset_extra_files_dir_name(name: str, ext: str, hid: int, conversion_key: Optional[str]) -> str: + if not conversion_key: + return f"extra_files_path_{hid}" + else: + return f"extra_files_path_{hid}_conversion_{conversion_key}" + + def imported_store_for_metadata( directory: str, object_store: Optional[ObjectStore] = None ) -> BaseDirectoryImportModelStore: diff --git a/test/unit/data/model/test_model_store.py b/test/unit/data/model/test_model_store.py index 58e4b74d8558..b2a3b0739103 100644 --- a/test/unit/data/model/test_model_store.py +++ b/test/unit/data/model/test_model_store.py @@ -147,6 +147,49 @@ def test_import_export_history_with_implicit_conversion(): assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid +def test_import_export_history_with_implicit_conversion_and_extra_files(): + app = _mock_app() + + u, h, d1, d2, j = _setup_simple_cat_job(app) + + convert_ext = "fasta" + implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h) + implicit_hda.hid = d2.hid + # this adds and flushes the result... + d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext) + app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True) + + d2.dataset.create_extra_files_path() + implicit_hda.dataset.create_extra_files_path() + + app.write_primary_file(d2, "cool primary file 1") + app.write_composite_file(d2, "cool composite file", "child_file") + + app.write_primary_file(implicit_hda, "cool primary file implicit") + app.write_composite_file(implicit_hda, "cool composite file implicit", "child_file_converted") + + assert len(h.active_datasets) == 3 + imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True) + + assert len(imported_history.active_datasets) == 3 + recovered_hda_2 = imported_history.active_datasets[1] + assert recovered_hda_2.implicitly_converted_datasets + imported_conversion = recovered_hda_2.implicitly_converted_datasets[0] + assert imported_conversion.type == "fasta" + assert imported_conversion.dataset == imported_history.active_datasets[2] + + # implicit conversions have the same HID... ensure this property is recovered... + assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid + + _assert_extra_files_has_parent_directory_with_single_file_containing( + imported_history.active_datasets[1], "child_file", "cool composite file" + ) + + _assert_extra_files_has_parent_directory_with_single_file_containing( + imported_history.active_datasets[2], "child_file_converted", "cool composite file implicit" + ) + + def test_import_export_bag_archive(): """Test a simple job import/export using a BagIt archive.""" dest_parent = mkdtemp() @@ -721,15 +764,24 @@ def test_import_export_composite_datasets(): _perform_import_from_directory(temp_directory, app, u, import_history) assert len(import_history.datasets) == 1 import_dataset = import_history.datasets[0] - root_extra_files_path = import_dataset.extra_files_path + _assert_extra_files_has_parent_directory_with_single_file_containing( + import_dataset, "child_file", "cool composite file" + ) + + +def _assert_extra_files_has_parent_directory_with_single_file_containing( + dataset, expected_file_name, expected_contents +): + root_extra_files_path = dataset.extra_files_path assert len(os.listdir(root_extra_files_path)) == 1 assert os.listdir(root_extra_files_path)[0] == "parent_dir" composite_sub_dir = os.path.join(root_extra_files_path, "parent_dir") child_files = os.listdir(composite_sub_dir) assert len(child_files) == 1 + assert child_files[0] == expected_file_name with open(os.path.join(composite_sub_dir, child_files[0])) as f: contents = f.read() - assert contents == "cool composite file" + assert contents == expected_contents def test_edit_metadata_files(): @@ -1048,7 +1100,9 @@ def write_primary_file(self, dataset_instance, contents): primary = NamedTemporaryFile("w") primary.write(contents) primary.flush() - self.object_store.update_from_file(dataset_instance.dataset, file_name=primary.name, create=True, preserve_symlinks=True) + self.object_store.update_from_file( + dataset_instance.dataset, file_name=primary.name, create=True, preserve_symlinks=True + ) def write_composite_file(self, dataset_instance, contents, file_name): composite1 = NamedTemporaryFile("w")