From d783f92c94a32f9693264b1689402bbb160e06e5 Mon Sep 17 00:00:00 2001 From: John Chilton Date: Sat, 21 Dec 2024 09:22:02 -0500 Subject: [PATCH] Implement paired_or_unpaired collections... --- .../model/dataset_collections/registry.py | 2 + .../model/dataset_collections/structure.py | 11 +- .../dataset_collections/subcollections.py | 17 ++- .../dataset_collections/type_description.py | 30 +++- .../dataset_collections/types/__init__.py | 8 ++ .../model/dataset_collections/types/paired.py | 11 +- .../types/paired_or_unpaired.py | 46 ++++++ lib/galaxy/schema/schema.py | 2 + lib/galaxy/tool_util/parameters/models.py | 74 ++++++++++ lib/galaxy/tools/__init__.py | 57 ++++++++ lib/galaxy/tools/actions/__init__.py | 24 +++- lib/galaxy/tools/parameters/basic.py | 56 +++++++- .../tools/split_paired_and_unpaired.xml | 132 ++++++++++++++++++ lib/galaxy/tools/wrappers.py | 21 ++- .../api/test_dataset_collections.py | 19 +++ lib/galaxy_test/api/test_tool_execute.py | 98 +++++++++++++ lib/galaxy_test/base/populators.py | 6 + .../tools/collection_paired_or_unpaired.xml | 51 +++++++ test/functional/tools/sample_tool_conf.xml | 3 +- .../data/dataset_collections/test_matching.py | 44 ++++++ .../dataset_collections/test_structure.py | 31 ++++ .../test_type_descriptions.py | 28 +++- 22 files changed, 744 insertions(+), 27 deletions(-) create mode 100644 lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py create mode 100644 lib/galaxy/tools/split_paired_and_unpaired.xml create mode 100644 test/functional/tools/collection_paired_or_unpaired.xml diff --git a/lib/galaxy/model/dataset_collections/registry.py b/lib/galaxy/model/dataset_collections/registry.py index bd148edafd2d..ed75294f68e7 100644 --- a/lib/galaxy/model/dataset_collections/registry.py +++ b/lib/galaxy/model/dataset_collections/registry.py @@ -2,6 +2,7 @@ from .types import ( list, paired, + paired_or_unpaired, record, ) @@ -9,6 +10,7 @@ list.ListDatasetCollectionType, paired.PairedDatasetCollectionType, record.RecordDatasetCollectionType, + paired_or_unpaired.PairedOrUnpairedDatasetCollectionType, ] diff --git a/lib/galaxy/model/dataset_collections/structure.py b/lib/galaxy/model/dataset_collections/structure.py index 673585a8c87f..83de5c9a87a9 100644 --- a/lib/galaxy/model/dataset_collections/structure.py +++ b/lib/galaxy/model/dataset_collections/structure.py @@ -1,9 +1,10 @@ """ Module for reasoning about structure of and matching hierarchical collections of data. """ -import logging +from typing import TYPE_CHECKING -log = logging.getLogger(__name__) +if TYPE_CHECKING: + from .type_description import CollectionTypeDescription class Leaf: @@ -149,7 +150,7 @@ def clone(self): return Tree(cloned_children, self.collection_type_description) def __str__(self): - return f"Tree[collection_type={self.collection_type_description},children={','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)}]" + return f"Tree[collection_type={self.collection_type_description},children=({','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)})]" def tool_output_to_structure(get_sliced_input_collection_structure, tool_output, collections_manager): @@ -190,7 +191,9 @@ def dict_map(func, input_dict): return {k: func(v) for k, v in input_dict.items()} -def get_structure(dataset_collection_instance, collection_type_description, leaf_subcollection_type=None): +def get_structure( + dataset_collection_instance, collection_type_description: "CollectionTypeDescription", leaf_subcollection_type=None +): if leaf_subcollection_type: collection_type_description = collection_type_description.effective_collection_type_description( leaf_subcollection_type diff --git a/lib/galaxy/model/dataset_collections/subcollections.py b/lib/galaxy/model/dataset_collections/subcollections.py index af6c2a397326..a47f8594ee16 100644 --- a/lib/galaxy/model/dataset_collections/subcollections.py +++ b/lib/galaxy/model/dataset_collections/subcollections.py @@ -1,4 +1,5 @@ from galaxy import exceptions +from .adapters import PromoteCollectionElementToCollectionAdapter def split_dataset_collection_instance(dataset_collection_instance, collection_type): @@ -6,13 +7,27 @@ def split_dataset_collection_instance(dataset_collection_instance, collection_ty return _split_dataset_collection(dataset_collection_instance.collection, collection_type) +def _is_a_subcollection_type(this_collection_type: str, collection_type: str): + if collection_type == "single_datasets": + # can be a subcollection of anything effectively... + return True + if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type: + return False + return True + + def _split_dataset_collection(dataset_collection, collection_type): this_collection_type = dataset_collection.collection_type - if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type: + is_this_collection_nested = ":" in this_collection_type + if not _is_a_subcollection_type(this_collection_type, collection_type): raise exceptions.MessageException("Cannot split collection in desired fashion.") split_elements = [] for element in dataset_collection.elements: + if not is_this_collection_nested and collection_type == "single_datasets": + split_elements.append(PromoteCollectionElementToCollectionAdapter(element)) + continue + child_collection = element.child_collection if child_collection is None: raise exceptions.MessageException("Cannot split collection in desired fashion.") diff --git a/lib/galaxy/model/dataset_collections/type_description.py b/lib/galaxy/model/dataset_collections/type_description.py index 4120db4c6f94..87492f7c29cf 100644 --- a/lib/galaxy/model/dataset_collections/type_description.py +++ b/lib/galaxy/model/dataset_collections/type_description.py @@ -51,9 +51,12 @@ def effective_collection_type(self, subcollection_type): if not self.has_subcollections_of_type(subcollection_type): raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}") + if subcollection_type == "single_datasets": + return self.collection_type + return self.collection_type[: -(len(subcollection_type) + 1)] - def has_subcollections_of_type(self, other_collection_type): + def has_subcollections_of_type(self, other_collection_type) -> bool: """Take in another type (either flat string or another CollectionTypeDescription) and determine if this collection contains subcollections matching that type. @@ -65,18 +68,37 @@ def has_subcollections_of_type(self, other_collection_type): if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type collection_type = self.collection_type - return collection_type.endswith(other_collection_type) and collection_type != other_collection_type + if collection_type == other_collection_type: + return False + if collection_type.endswith(other_collection_type): + return True + if other_collection_type == "paired_or_unpaired": + # this can be thought of as a subcollection of anything except a pair + # since it would match a pair exactly + return collection_type != "paired" + if other_collection_type == "single_datasets": + # effectively any collection has unpaired subcollections + return True + return False def is_subcollection_of_type(self, other_collection_type): if not hasattr(other_collection_type, "collection_type"): other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type) return other_collection_type.has_subcollections_of_type(self) - def can_match_type(self, other_collection_type): + def can_match_type(self, other_collection_type) -> bool: if hasattr(other_collection_type, "collection_type"): other_collection_type = other_collection_type.collection_type collection_type = self.collection_type - return other_collection_type == collection_type + if other_collection_type == collection_type: + return True + elif other_collection_type == "paired" and collection_type == "paired_or_unpaired": + return True + elif other_collection_type == "paired_or_unpaired" and collection_type == "paired": + return True + + # can we push this to the type registry somehow? + return False def subcollection_type_description(self): if not self.__has_subcollections: diff --git a/lib/galaxy/model/dataset_collections/types/__init__.py b/lib/galaxy/model/dataset_collections/types/__init__.py index c294f6957be6..30eee0489d58 100644 --- a/lib/galaxy/model/dataset_collections/types/__init__.py +++ b/lib/galaxy/model/dataset_collections/types/__init__.py @@ -21,3 +21,11 @@ def generate_elements(self, dataset_instances: dict, **kwds): class BaseDatasetCollectionType(DatasetCollectionType): def _validation_failed(self, message): raise exceptions.ObjectAttributeInvalidException(message) + + def _ensure_dataset_with_identifier(self, dataset_instances: dict, name: str): + dataset_instance = dataset_instances.get(name) + if dataset_instance is None: + raise exceptions.ObjectAttributeInvalidException( + f"An element with the identifier {name} is required to create this collection type" + ) + return dataset_instance diff --git a/lib/galaxy/model/dataset_collections/types/paired.py b/lib/galaxy/model/dataset_collections/types/paired.py index e774ab67aace..825283fb0243 100644 --- a/lib/galaxy/model/dataset_collections/types/paired.py +++ b/lib/galaxy/model/dataset_collections/types/paired.py @@ -1,3 +1,4 @@ +from galaxy.exceptions import RequestParameterInvalidException from galaxy.model import ( DatasetCollectionElement, HistoryDatasetAssociation, @@ -16,13 +17,19 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType): collection_type = "paired" def generate_elements(self, dataset_instances, **kwds): - if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER): + num_datasets = len(dataset_instances) + if num_datasets != 2: + raise RequestParameterInvalidException( + f"Incorrect number of datasets - 2 datasets exactly are required to create a single_or_paired collection" + ) + + if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER): left_association = DatasetCollectionElement( element=forward_dataset, element_identifier=FORWARD_IDENTIFIER, ) yield left_association - if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER): + if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER): right_association = DatasetCollectionElement( element=reverse_dataset, element_identifier=REVERSE_IDENTIFIER, diff --git a/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py b/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py new file mode 100644 index 000000000000..8a8a6cd7e112 --- /dev/null +++ b/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py @@ -0,0 +1,46 @@ +from galaxy.exceptions import RequestParameterInvalidException +from galaxy.model import ( + DatasetCollectionElement, + HistoryDatasetAssociation, +) +from . import BaseDatasetCollectionType +from .paired import ( + FORWARD_IDENTIFIER, + REVERSE_IDENTIFIER, +) + +SINGLETON_IDENTIFIER = "unpaired" + + +class PairedOrUnpairedDatasetCollectionType(BaseDatasetCollectionType): + """ """ + + collection_type = "paired_or_unpaired" + + def generate_elements(self, dataset_instances, **kwds): + num_datasets = len(dataset_instances) + if num_datasets > 2 or num_datasets < 1: + raise RequestParameterInvalidException( + f"Incorrect number of datasets - 1 or 2 datasets is required to create a paired_or_unpaired collection" + ) + + if num_datasets == 2: + if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER): + left_association = DatasetCollectionElement( + element=forward_dataset, + element_identifier=FORWARD_IDENTIFIER, + ) + yield left_association + if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER): + right_association = DatasetCollectionElement( + element=reverse_dataset, + element_identifier=REVERSE_IDENTIFIER, + ) + yield right_association + else: + if single_datasets := self._ensure_dataset_with_identifier(dataset_instances, SINGLETON_IDENTIFIER): + single_association = DatasetCollectionElement( + element=single_datasets, + element_identifier=SINGLETON_IDENTIFIER, + ) + yield single_association diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py index 8c031a0fcfe1..010e7a78c278 100644 --- a/lib/galaxy/schema/schema.py +++ b/lib/galaxy/schema/schema.py @@ -33,6 +33,8 @@ from typing_extensions import ( Annotated, Literal, + NotRequired, + TypedDict, ) from galaxy.schema import partial_model diff --git a/lib/galaxy/tool_util/parameters/models.py b/lib/galaxy/tool_util/parameters/models.py index d0e7e6bbcb8f..1c6eddfc403a 100644 --- a/lib/galaxy/tool_util/parameters/models.py +++ b/lib/galaxy/tool_util/parameters/models.py @@ -33,6 +33,7 @@ StrictInt, StrictStr, Tag, + TypeAdapter, ValidationError, ) from typing_extensions import ( @@ -329,6 +330,7 @@ def request_requires_value(self) -> bool: DataSrcT = Literal["hda", "ldda"] MultiDataSrcT = Literal["hda", "ldda", "hdca"] +# @jmchilton you meant CollectionSrcT - fix that at some point please. CollectionStrT = Literal["hdca"] TestCaseDataSrcT = Literal["File"] @@ -527,6 +529,78 @@ class DataCollectionRequestInternal(StrictModel): id: StrictInt +CollectionAdapterSrcT = Literal["CollectionAdapter"] + + +class AdaptedDataCollectionRequestBase(StrictModel): + src: CollectionAdapterSrcT + + +class AdaptedDataCollectionPromoteDatasetToCollectionRequest(AdaptedDataCollectionRequestBase): + adapter_type: Literal["PromoteDatasetToCollection"] + collection_type: Literal["list", "paired_or_unpaired"] + adapting: DataRequestHda + + +# calling this name and element_identifier to align with fetch API, etc... +class AdapterElementRequest(DataRequestHda): + name: str # element_identifier + + +class AdaptedDataCollectionPromoteDatasetsToCollectionRequest(AdaptedDataCollectionRequestBase): + adapter_type: Literal["PromoteDatasetsToCollection"] + # could allow list in here without changing much else I think but I'm trying to keep these tight in scope + collection_type: Literal["paired", "paired_or_unpaired"] + adapting: List[AdapterElementRequest] + + +AdaptedDataCollectionRequest = Annotated[ + Union[ + AdaptedDataCollectionPromoteDatasetToCollectionRequest, AdaptedDataCollectionPromoteDatasetsToCollectionRequest + ], + Field(discriminator="adapter_type"), +] +AdaptedDataCollectionRequestTypeAdapter = TypeAdapter(AdaptedDataCollectionRequest) + + +class DatasetCollectionElementReference(StrictModel): + src: Literal["dce"] + id: StrictInt + + +class AdaptedDataCollectionPromoteCollectionElementToCollectionRequestInternal(AdaptedDataCollectionRequestBase): + adapter_type: Literal["PromoteCollectionElementToCollection"] + adapting: DatasetCollectionElementReference + + +class AdaptedDataCollectionPromoteDatasetToCollectionRequestInternal(AdaptedDataCollectionRequestBase): + adapter_type: Literal["PromoteDatasetToCollection"] + collection_type: Literal["list", "paired_or_unpaired"] + adapting: DataRequestInternalHda + + +class AdapterElementRequestInternal(DataRequestInternalHda): + name: str # element_identifier + + +class AdaptedDataCollectionPromoteDatasetsToCollectionRequestInternal(AdaptedDataCollectionRequestBase): + adapter_type: Literal["PromoteDatasetsToCollection"] + # could allow list in here without changing much else I think but I'm trying to keep these tight in scope + collection_type: Literal["paired", "paired_or_unpaired"] + adapting: List[AdapterElementRequestInternal] + + +AdaptedDataCollectionRequestInternal = Annotated[ + Union[ + AdaptedDataCollectionPromoteCollectionElementToCollectionRequestInternal, + AdaptedDataCollectionPromoteDatasetToCollectionRequestInternal, + AdaptedDataCollectionPromoteDatasetsToCollectionRequestInternal, + ], + Field(discriminator="adapter_type"), +] +AdaptedDataCollectionRequestInternalTypeAdapter = TypeAdapter(AdaptedDataCollectionRequestInternal) + + class DataCollectionParameterModel(BaseGalaxyToolParameterModelDefinition): parameter_type: Literal["gx_data_collection"] = "gx_data_collection" collection_type: Optional[str] = None diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 1174db417b3b..08a6a0054ec4 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -3534,6 +3534,63 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history ) +class SplitPairedAndUnpairedTool(DatabaseOperationTool): + tool_type = "split_paired_and_unpaired" + require_terminal_states = False + require_dataset_ok = False + + def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds): + has_collection = incoming["input"] + if hasattr(has_collection, "element_type"): + # It is a DCE + collection = has_collection.element_object + else: + # It is an HDCA + collection = has_collection.collection + + collection_type = collection.collection_type + assert collection_type in ["list", "list:paired", "list:paired_or_unpaired"] + + unpaired_dce_copies = {} + paired_dce_copies = {} + paired_datasets = [] + + def _handle_unpaired(dce): + element_identifier = dce.element_identifier + assert getattr(dce.element_object, "history_content_type", None) == "dataset" + copied_value = dce.element_object.copy(copy_tags=dce.element_object.tags, flush=False) + unpaired_dce_copies[element_identifier] = copied_value + + def _handle_paired(dce): + element_identifier = dce.element_identifier + copied_value = dce.element_object.copy(flush=False) + paired_dce_copies[element_identifier] = copied_value + paired_datasets.append(copied_value.elements[0].element_object) + paired_datasets.append(copied_value.elements[1].element_object) + + if collection_type == "list": + for element in collection.elements: + _handle_unpaired(element) + elif collection_type == "list:paired": + for element in collection.elements: + _handle_paired(element) + elif collection_type == "list:paired_or_unpaired": + for element in collection.elements: + if getattr(element.element_object, "history_content_type", None) == "dataset": + _handle_unpaired(element) + else: + _handle_paired(element) + + self._add_datasets_to_history(history, unpaired_dce_copies.values()) + self._add_datasets_to_history(history, paired_datasets) + output_collections.create_collection( + self.outputs["output_unpaired"], "output_unpaired", elements=unpaired_dce_copies, propagate_hda_tags=False + ) + output_collections.create_collection( + self.outputs["output_paired"], "output_paired", elements=paired_dce_copies, propagate_hda_tags=False + ) + + class ExtractDatasetCollectionTool(DatabaseOperationTool): tool_type = "extract_dataset" require_terminal_states = False diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index f7a2138795a3..df4f87ad009d 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -34,6 +34,7 @@ WorkflowRequestInputParameter, ) from galaxy.model.base import transaction +from galaxy.model.dataset_collections.adapters import CollectionAdapter from galaxy.model.dataset_collections.builder import CollectionBuilder from galaxy.model.dataset_collections.matching import MatchingCollections from galaxy.model.none_like import NoneDataset @@ -265,12 +266,16 @@ def process_dataset(data, formats=None): collection = None child_collection = False - if hasattr(value, "child_collection"): - # if we are mapping a collection over a tool, we only require the child_collection + if isinstance(value, CollectionAdapter): + # collection was created for this execution, use it as is + collection = value + elif hasattr(value, "child_collection"): + # if we are mapping a collection over a tool, so value is a DCE and + # we only require the child_collection child_collection = True collection = value.child_collection else: - # else the tool takes a collection as input so we need everything + # else the tool takes the collection as input so we need everything collection = value.collection action_tuples = collection.dataset_action_tuples @@ -935,6 +940,19 @@ def _record_inputs(self, trans, tool, job, incoming, inp_data, inp_dataset_colle job.add_input_dataset_collection(name, dataset_collection) elif isinstance(dataset_collection, model.DatasetCollectionElement): job.add_input_dataset_collection_element(name, dataset_collection) + elif isinstance(dataset_collection, CollectionAdapter): + adapting = dataset_collection.adapting + # TODO: record adapter json in the association I think... -John + if isinstance(adapting, model.DatasetCollectionElement): + job.add_input_dataset_collection_element(name, adapting) + elif isinstance(adapting, model.HistoryDatasetAssociation): + job.add_input_dataset(name, dataset=adapting) + elif isinstance(adapting, list): + for element in adapting: + input_key = f"{name}|__adapter_part__|{element.element_identifier}" + job.add_input_dataset(input_key, dataset=element.hda) + else: + log.info(f"not recording something as a collection in here... for name {name}") # If this an input collection is a reduction, we expanded it for dataset security, type # checking, and such, but the persisted input must be the original collection diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py index 6b06711c9c1f..39a12c319b40 100644 --- a/lib/galaxy/tools/parameters/basic.py +++ b/lib/galaxy/tools/parameters/basic.py @@ -40,6 +40,12 @@ LibraryDatasetDatasetAssociation, ) from galaxy.model.dataset_collections import builder +from galaxy.model.dataset_collections.adapters import ( + CollectionAdapter, + recover_adapter, + TransientCollectionAdapterDatasetInstanceElement, + validate_collection_adapter_src_dict, +) from galaxy.schema.fetch_data import FilesPayload from galaxy.tool_util.parameters.factory import get_color_value from galaxy.tool_util.parser import get_input_source as ensure_input_source @@ -2013,6 +2019,21 @@ def src_id_to_item( HistoryDatasetCollectionAssociation, LibraryDatasetDatasetAssociation, ]: + adapter_model = None + if value["src"] == "CollectionAdapter": + adapter_model = validate_collection_adapter_src_dict(value) + adapting = adapter_model.adapting + if isinstance(adapting, list): + elements = [] + for item in adapting: + element = TransientCollectionAdapterDatasetInstanceElement( + item.name, + src_id_to_item(sa_session, item.dict(), security), + ) + elements.append(element) + return recover_adapter(elements, adapter_model) + else: + value = adapting.dict() src_to_class = { "hda": HistoryDatasetAssociation, "ldda": LibraryDatasetDatasetAssociation, @@ -2027,6 +2048,8 @@ def src_id_to_item( raise ValueError(f"Unknown input source {value['src']} passed to job submission API.") if not item: raise ValueError("Invalid input id passed to job submission API.") + if adapter_model is not None: + item = recover_adapter(item, adapter_model) item.extra_params = {k: v for k, v in value.items() if k not in ("src", "id")} return item @@ -2477,9 +2500,17 @@ def from_json(self, value, trans, other_values=None): # a DatasetCollectionElement instead of a # HistoryDatasetCollectionAssociation. rval = value - elif isinstance(value, MutableMapping) and "src" in value and "id" in value: - if value["src"] == "hdca": - rval = session.get(HistoryDatasetCollectionAssociation, trans.security.decode_id(value["id"])) + elif isinstance(value, CollectionAdapter): + log.info("\n\n\n\n\n\n HAVE AN EPHEMERAL COLLECTION... \n\n\n\n\n\n\n") + # if this mapped over a paired_or_unpaired collection - this parameter + # will receive an HDA instead of HDCA or DCE + rval = value + elif ( + isinstance(value, MutableMapping) + and "src" in value + and ("id" in value or value["src"] == "CollectionAdapter") + ): + rval = src_id_to_item(sa_session=trans.sa_session, value=value, security=trans.security) elif isinstance(value, list): if len(value) > 0: value = value[0] @@ -2801,13 +2832,25 @@ def write_elements_to_collection(has_elements, collection_builder): def history_item_dict_to_python(value, app, name): if isinstance(value, MutableMapping) and "src" in value: - if value["src"] not in ("hda", "dce", "ldda", "hdca"): + if value["src"] not in ("hda", "dce", "ldda", "hdca", "CollectionAdapter"): raise ParameterValueError(f"Invalid value {value}", name) return src_id_to_item(sa_session=app.model.context, security=app.security, value=value) def history_item_to_json(value, app, use_security): src = None + + # unwrap adapter + collection_adapter: Optional[CollectionAdapter] = None + if isinstance(value, CollectionAdapter): + collection_adapter = value + value = value.adapting + if isinstance(value, list): + # if we are not just adapting one thing... skip the rest of this + # and just serialize the stuff we know we want anyway. Perhaps all + # this should just be the only path through. The CollectionAdapter + # should know what to do with just use_security I think? + return collection_adapter.to_adapter_model(value).dict() if isinstance(value, MutableMapping) and "src" in value and "id" in value: return value elif isinstance(value, DatasetCollectionElement): @@ -2823,4 +2866,7 @@ def history_item_to_json(value, app, use_security): src = "hda" if src is not None: object_id = cached_id(value) - return {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src} + rval = {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src} + if collection_adapter: + rval = collection_adapter.to_adapter_model(rval).dict() + return rval diff --git a/lib/galaxy/tools/split_paired_and_unpaired.xml b/lib/galaxy/tools/split_paired_and_unpaired.xml new file mode 100644 index 000000000000..b05db5207fac --- /dev/null +++ b/lib/galaxy/tools/split_paired_and_unpaired.xml @@ -0,0 +1,132 @@ + + + + + + operation_2409 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py index 12d3d779cafc..7cefacbd4269 100644 --- a/lib/galaxy/tools/wrappers.py +++ b/lib/galaxy/tools/wrappers.py @@ -19,7 +19,10 @@ Union, ) -from typing_extensions import TypeAlias +from typing_extensions import ( + Self, + TypeAlias, +) from galaxy.model import ( DatasetCollection, @@ -489,6 +492,14 @@ def _path_or_uri(self) -> str: def file_name(self) -> str: return str(self) + @property + def has_single_item(self) -> bool: + return True + + @property + def single_item(self) -> Self: + return self + def __getattr__(self, key: Any) -> Any: if key in ("extra_files_path", "files_path"): if not self.compute_environment: @@ -766,6 +777,14 @@ def serialize( include_collection_name=include_collection_name, ) + @property + def has_single_item(self) -> bool: + return self.__input_supplied and len(self.__element_instance_list) == 1 + + @property + def single_item(self) -> Self: + return self[0] + @property def is_input_supplied(self) -> bool: return self.__input_supplied diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py index 372d693d4fd0..d2b3e416f5a2 100644 --- a/lib/galaxy_test/api/test_dataset_collections.py +++ b/lib/galaxy_test/api/test_dataset_collections.py @@ -101,6 +101,25 @@ def test_create_list_of_new_pairs(self): pair_1_element_1 = pair_elements[0] assert pair_1_element_1["element_index"] == 0 + def test_create_paried_or_unpaired(self, history_id): + collection_name = "a singleton in a paired_or_unpaired collection" + contents = [ + ("unpaired", "1\t2\t3"), + ] + single_identifier = self.dataset_collection_populator.list_identifiers(history_id, contents) + payload = dict( + name=collection_name, + instance_type="history", + history_id=history_id, + element_identifiers=single_identifier, + collection_type="paired_or_unpaired", + ) + create_response = self._post("dataset_collections", payload, json=True) + dataset_collection = self._check_create_response(create_response) + assert dataset_collection["collection_type"] == "paired_or_unpaired" + returned_collections = dataset_collection["elements"] + assert len(returned_collections) == 1, dataset_collection + def test_create_record(self, history_id): contents = [ ("condition", "1\t2\t3"), diff --git a/lib/galaxy_test/api/test_tool_execute.py b/lib/galaxy_test/api/test_tool_execute.py index 95bf43e27921..f85afcc4642a 100644 --- a/lib/galaxy_test/api/test_tool_execute.py +++ b/lib/galaxy_test/api/test_tool_execute.py @@ -380,6 +380,104 @@ def test_map_over_collection( output_collection.assert_has_dataset_element("reverse").with_contents_stripped("456") +@requires_tool_id("collection_paired_or_unpaired") +def test_map_over_paired_or_unpaired_with_list_paired(target_history: TargetHistory, required_tool: RequiredTool): + hdca = target_history.with_example_list_of_pairs() + execute = required_tool.execute.with_inputs( + {"f1": {"batch": True, "values": [{"map_over_type": "paired", **hdca.src_dict}]}} + ) + execute.assert_has_n_jobs(2).assert_creates_n_implicit_collections(1) + output_collection = execute.assert_creates_implicit_collection(0) + output_collection.assert_has_dataset_element("test0").with_contents_stripped("123\n456") + output_collection.assert_has_dataset_element("test1").with_contents_stripped("789\n0ab") + + +@requires_tool_id("collection_paired_or_unpaired") +def test_map_over_paired_or_unpaired_with_list(target_history: TargetHistory, required_tool: RequiredTool): + contents = [("foo", "text for foo element")] + hdca = target_history.with_list(contents) + execute = required_tool.execute.with_inputs( + {"f1": {"batch": True, "values": [{"map_over_type": "single_datasets", **hdca.src_dict}]}} + ) + execute.assert_has_n_jobs(1).assert_creates_n_implicit_collections(1) + output_collection = execute.assert_creates_implicit_collection(0) + output_collection.assert_has_dataset_element("foo").with_contents_stripped("text for foo element") + + +@requires_tool_id("collection_paired_or_unpaired") +def test_map_over_paired_or_unpaired_with_list_of_lists(target_history: TargetHistory, required_tool: RequiredTool): + hdca = target_history.with_example_list_of_lists() + execute = required_tool.execute.with_inputs( + {"f1": {"batch": True, "values": [{"map_over_type": "single_datasets", **hdca.src_dict}]}} + ) + execute.assert_has_n_jobs(3).assert_creates_n_implicit_collections(1) + output_collection = execute.assert_creates_implicit_collection(0) + print(output_collection.details) + assert output_collection.details["collection_type"] == "list:list" + as_dict_0 = output_collection.with_element_dict(0) + assert len(as_dict_0["object"]["elements"]) == 3 + + +@requires_tool_id("collection_paired_or_unpaired") +def test_adapting_dataset_to_paired_or_unpaired(target_history: TargetHistory, required_tool: RequiredTool): + hda1 = target_history.with_dataset("1\t2\t3").src_dict + execution = required_tool.execute.with_inputs( + { + "f1": { + "src": "CollectionAdapter", + "adapter_type": "PromoteDatasetToCollection", + "collection_type": "paired_or_unpaired", + "adapting": hda1, + } + } + ) + execution.assert_has_job(0).with_output("out1").with_contents_stripped("1\t2\t3") + + +@requires_tool_id("cat_collection") +def test_adapting_dataset_to_list(target_history: TargetHistory, required_tool: RequiredTool): + hda1 = target_history.with_dataset("1\t2\t3").src_dict + execution = required_tool.execute.with_inputs( + { + "input1": { + "src": "CollectionAdapter", + "adapter_type": "PromoteDatasetToCollection", + "collection_type": "list", + "adapting": hda1, + } + } + ) + execution.assert_has_job(0).with_output("out_file1").with_contents_stripped("1\t2\t3") + + +@requires_tool_id("collection_paired_test") +def test_adapting_two_datasets_to_paired_collection(target_history: TargetHistory, required_tool: RequiredTool): + hda1 = target_history.with_dataset("1\t2\t3").src_dict + hda2 = target_history.with_dataset("4\t5\t6").src_dict + execution = required_tool.execute.with_inputs( + { + "f1": { + "src": "CollectionAdapter", + "adapter_type": "PromoteDatasetsToCollection", + "collection_type": "paired", + "adapting": [ + {"name": "forward", **hda1}, + {"name": "reverse", **hda2}, + ], + } + } + ) + execution.assert_has_job(0).with_output("out1").with_contents_stripped("1\t2\t3\n4\t5\t6") + + +@requires_tool_id("gx_data") +def test_map_over_data_param_with_list_of_lists(target_history: TargetHistory, required_tool: RequiredTool): + hdca = target_history.with_example_list_of_lists() + execute = required_tool.execute.with_inputs({"parameter": {"batch": True, "values": [hdca.src_dict]}}) + execute.assert_has_n_jobs(3).assert_creates_n_implicit_collections(1) + execute.assert_creates_implicit_collection(0) + + @requires_tool_id("gx_repeat_boolean_min") def test_optional_repeats_with_mins_filled_id(target_history: TargetHistory, required_tool: RequiredTool): # we have a tool test for this but I wanted to verify it wasn't just the diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py index ee32c27b822c..528156ca244e 100644 --- a/lib/galaxy_test/base/populators.py +++ b/lib/galaxy_test/base/populators.py @@ -3951,6 +3951,12 @@ def with_list(self, contents: Optional[ListContentsDescription] = None) -> "HasS def with_example_list_of_pairs(self) -> "HasSrcDict": return HasSrcDict("hdca", self._dataset_collection_populator.example_list_of_pairs(self._history_id)) + def with_example_list_of_lists(self) -> "HasSrcDict": + return HasSrcDict( + "hdca", + self._dataset_collection_populator.create_list_of_list_in_history(self._history_id, wait=True).json()["id"], + ) + @classmethod def _fetch_response(clz, response: Response) -> "HasSrcDict": api_asserts.assert_status_code_is_ok(response) diff --git a/test/functional/tools/collection_paired_or_unpaired.xml b/test/functional/tools/collection_paired_or_unpaired.xml new file mode 100644 index 000000000000..6bf70e7735f8 --- /dev/null +++ b/test/functional/tools/collection_paired_or_unpaired.xml @@ -0,0 +1,51 @@ + + + #if $f1.has_single_item: + cat $f1.single_item >> $out1; + echo "Single item" + #else + cat $f1.forward $f1['reverse'] >> $out1; + echo "Paired items" + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml index 477fe69bde34..49e414eeabd6 100644 --- a/test/functional/tools/sample_tool_conf.xml +++ b/test/functional/tools/sample_tool_conf.xml @@ -213,6 +213,7 @@ + @@ -320,5 +321,5 @@ - + diff --git a/test/unit/data/dataset_collections/test_matching.py b/test/unit/data/dataset_collections/test_matching.py index 3ea1747d38df..c6fdac01f750 100644 --- a/test/unit/data/dataset_collections/test_matching.py +++ b/test/unit/data/dataset_collections/test_matching.py @@ -48,6 +48,12 @@ def test_valid_collection_subcollection_matching(): assert_can_match((nested_list, "paired"), flat_list) +def test_paired_can_act_as_paired_or_unpaired(): + paired = pair_instance() + optional_paired = paired_or_unpaired_pair_instance() + assert_can_match(paired, optional_paired) + + def assert_can_match(*items): to_match = build_collections_to_match(*items) matching.MatchingCollections.for_collections(to_match, TYPE_DESCRIPTION_FACTORY) @@ -114,6 +120,44 @@ def list_paired_instance(): ) +def list_of_paired_and_unpaired_instance(): + return collection_instance( + collection_type="list:paired_or_unpaired", + elements=[ + collection_element( + "el1", + collection( + "paired_or_unpaired", + [ + hda_element("forward"), + hda_element("reverse"), + ], + ), + ), + collection_element( + "el2", + collection( + "paired_or_unpaired", + [ + hda_element("unpaired"), + ], + ), + ), + ], + ) + + +def paired_or_unpaired_pair_instance(): + paired_collection_instance = collection_instance( + collection_type="paired_or_unpaired", + elements=[ + hda_element("forward"), + hda_element("reverse"), + ], + ) + return paired_collection_instance + + def list_instance(collection_type="list", elements=None, ids=None): if not elements: if ids is None: diff --git a/test/unit/data/dataset_collections/test_structure.py b/test/unit/data/dataset_collections/test_structure.py index 1c97d353a768..438c6c1c3690 100644 --- a/test/unit/data/dataset_collections/test_structure.py +++ b/test/unit/data/dataset_collections/test_structure.py @@ -2,6 +2,7 @@ from galaxy.model.dataset_collections.type_description import CollectionTypeDescriptionFactory from .test_matching import ( list_of_lists_instance, + list_of_paired_and_unpaired_instance, list_paired_instance, pair_instance, ) @@ -25,6 +26,7 @@ def test_get_structure_list_paired_over_paired(): assert tree.children[0][0] == "data1" assert tree.children[0][1].is_leaf + def test_get_structure_list_of_lists(): list_of_lists_type_description = factory.for_collection_type("list:list") tree = get_structure(list_of_lists_instance(), list_of_lists_type_description) @@ -41,3 +43,32 @@ def test_get_structure_list_of_lists_over_list(): assert len(tree.children) == 2 assert tree.children[0][0] == "outer1" assert tree.children[0][1].is_leaf + + +def test_get_structure_list_paired_or_unpaired(): + list_pair_or_unpaired_description = factory.for_collection_type("list:paired_or_unpaired") + tree = get_structure(list_of_paired_and_unpaired_instance(), list_pair_or_unpaired_description) + assert tree.collection_type_description.collection_type == "list:paired_or_unpaired" + assert len(tree.children) == 2 + assert tree.children[0][0] == "el1" + assert not tree.children[0][1].is_leaf + + +def test_get_structure_list_paired_or_unpaired_over_paired_or_unpaired(): + list_pair_or_unpaired_description = factory.for_collection_type("list:paired_or_unpaired") + tree = get_structure( + list_of_paired_and_unpaired_instance(), list_pair_or_unpaired_description, "paired_or_unpaired" + ) + assert tree.collection_type_description.collection_type == "list" + assert len(tree.children) == 2 + assert tree.children[0][0] == "el1" + assert tree.children[0][1].is_leaf + + +def test_get_structure_list_of_lists_over_single_datasests(): + list_of_lists_type_description = factory.for_collection_type("list:list") + tree = get_structure(list_of_lists_instance(), list_of_lists_type_description, "single_datasets") + assert tree.collection_type_description.collection_type == "list:list" + assert len(tree.children) == 2 + assert tree.children[0][0] == "outer1" + assert not tree.children[0][1].is_leaf diff --git a/test/unit/data/dataset_collections/test_type_descriptions.py b/test/unit/data/dataset_collections/test_type_descriptions.py index 11a1f7282a48..6d60d2269b02 100644 --- a/test/unit/data/dataset_collections/test_type_descriptions.py +++ b/test/unit/data/dataset_collections/test_type_descriptions.py @@ -1,8 +1,9 @@ from galaxy.model.dataset_collections.type_description import CollectionTypeDescriptionFactory +factory = CollectionTypeDescriptionFactory(None) + def test_simple_descriptions(): - factory = CollectionTypeDescriptionFactory(None) nested_type_description = factory.for_collection_type("list:paired") paired_type_description = factory.for_collection_type("paired") assert not nested_type_description.has_subcollections_of_type("list") @@ -11,8 +12,23 @@ def test_simple_descriptions(): assert nested_type_description.has_subcollections_of_type(paired_type_description) assert nested_type_description.has_subcollections() assert not paired_type_description.has_subcollections() - assert paired_type_description.rank_collection_type() == 'paired' - assert nested_type_description.rank_collection_type() == 'list' - assert nested_type_description.effective_collection_type(paired_type_description) == 'list' - assert nested_type_description.effective_collection_type_description(paired_type_description).collection_type == 'list' - assert nested_type_description.child_collection_type() == 'paired' + assert paired_type_description.rank_collection_type() == "paired" + assert nested_type_description.rank_collection_type() == "list" + assert nested_type_description.effective_collection_type(paired_type_description) == "list" + assert ( + nested_type_description.effective_collection_type_description(paired_type_description).collection_type == "list" + ) + assert nested_type_description.child_collection_type() == "paired" + + +def test_paired_or_unpaired_handling(): + list_type_description = factory.for_collection_type("list") + assert list_type_description.has_subcollections_of_type("paired_or_unpaired") + paired_type_description = factory.for_collection_type("paired") + assert not paired_type_description.has_subcollections_of_type("paired_or_unpaired") + + nested_type_description = factory.for_collection_type("list:paired") + assert nested_type_description.has_subcollections_of_type("paired_or_unpaired") + + nested_list_type_description = factory.for_collection_type("list:list") + assert nested_list_type_description.has_subcollections_of_type("paired_or_unpaired")