diff --git a/lib/galaxy/model/dataset_collections/registry.py b/lib/galaxy/model/dataset_collections/registry.py
index bd148edafd2d..ed75294f68e7 100644
--- a/lib/galaxy/model/dataset_collections/registry.py
+++ b/lib/galaxy/model/dataset_collections/registry.py
@@ -2,6 +2,7 @@
from .types import (
list,
paired,
+ paired_or_unpaired,
record,
)
@@ -9,6 +10,7 @@
list.ListDatasetCollectionType,
paired.PairedDatasetCollectionType,
record.RecordDatasetCollectionType,
+ paired_or_unpaired.PairedOrUnpairedDatasetCollectionType,
]
diff --git a/lib/galaxy/model/dataset_collections/structure.py b/lib/galaxy/model/dataset_collections/structure.py
index 673585a8c87f..83de5c9a87a9 100644
--- a/lib/galaxy/model/dataset_collections/structure.py
+++ b/lib/galaxy/model/dataset_collections/structure.py
@@ -1,9 +1,10 @@
""" Module for reasoning about structure of and matching hierarchical collections of data.
"""
-import logging
+from typing import TYPE_CHECKING
-log = logging.getLogger(__name__)
+if TYPE_CHECKING:
+ from .type_description import CollectionTypeDescription
class Leaf:
@@ -149,7 +150,7 @@ def clone(self):
return Tree(cloned_children, self.collection_type_description)
def __str__(self):
- return f"Tree[collection_type={self.collection_type_description},children={','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)}]"
+ return f"Tree[collection_type={self.collection_type_description},children=({','.join(f'{identifier_and_element[0]}={identifier_and_element[1]}' for identifier_and_element in self.children)})]"
def tool_output_to_structure(get_sliced_input_collection_structure, tool_output, collections_manager):
@@ -190,7 +191,9 @@ def dict_map(func, input_dict):
return {k: func(v) for k, v in input_dict.items()}
-def get_structure(dataset_collection_instance, collection_type_description, leaf_subcollection_type=None):
+def get_structure(
+ dataset_collection_instance, collection_type_description: "CollectionTypeDescription", leaf_subcollection_type=None
+):
if leaf_subcollection_type:
collection_type_description = collection_type_description.effective_collection_type_description(
leaf_subcollection_type
diff --git a/lib/galaxy/model/dataset_collections/subcollections.py b/lib/galaxy/model/dataset_collections/subcollections.py
index af6c2a397326..a47f8594ee16 100644
--- a/lib/galaxy/model/dataset_collections/subcollections.py
+++ b/lib/galaxy/model/dataset_collections/subcollections.py
@@ -1,4 +1,5 @@
from galaxy import exceptions
+from .adapters import PromoteCollectionElementToCollectionAdapter
def split_dataset_collection_instance(dataset_collection_instance, collection_type):
@@ -6,13 +7,27 @@ def split_dataset_collection_instance(dataset_collection_instance, collection_ty
return _split_dataset_collection(dataset_collection_instance.collection, collection_type)
+def _is_a_subcollection_type(this_collection_type: str, collection_type: str):
+ if collection_type == "single_datasets":
+ # can be a subcollection of anything effectively...
+ return True
+ if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type:
+ return False
+ return True
+
+
def _split_dataset_collection(dataset_collection, collection_type):
this_collection_type = dataset_collection.collection_type
- if not this_collection_type.endswith(collection_type) or this_collection_type == collection_type:
+ is_this_collection_nested = ":" in this_collection_type
+ if not _is_a_subcollection_type(this_collection_type, collection_type):
raise exceptions.MessageException("Cannot split collection in desired fashion.")
split_elements = []
for element in dataset_collection.elements:
+ if not is_this_collection_nested and collection_type == "single_datasets":
+ split_elements.append(PromoteCollectionElementToCollectionAdapter(element))
+ continue
+
child_collection = element.child_collection
if child_collection is None:
raise exceptions.MessageException("Cannot split collection in desired fashion.")
diff --git a/lib/galaxy/model/dataset_collections/type_description.py b/lib/galaxy/model/dataset_collections/type_description.py
index 4120db4c6f94..87492f7c29cf 100644
--- a/lib/galaxy/model/dataset_collections/type_description.py
+++ b/lib/galaxy/model/dataset_collections/type_description.py
@@ -51,9 +51,12 @@ def effective_collection_type(self, subcollection_type):
if not self.has_subcollections_of_type(subcollection_type):
raise ValueError(f"Cannot compute effective subcollection type of {subcollection_type} over {self}")
+ if subcollection_type == "single_datasets":
+ return self.collection_type
+
return self.collection_type[: -(len(subcollection_type) + 1)]
- def has_subcollections_of_type(self, other_collection_type):
+ def has_subcollections_of_type(self, other_collection_type) -> bool:
"""Take in another type (either flat string or another
CollectionTypeDescription) and determine if this collection contains
subcollections matching that type.
@@ -65,18 +68,37 @@ def has_subcollections_of_type(self, other_collection_type):
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
collection_type = self.collection_type
- return collection_type.endswith(other_collection_type) and collection_type != other_collection_type
+ if collection_type == other_collection_type:
+ return False
+ if collection_type.endswith(other_collection_type):
+ return True
+ if other_collection_type == "paired_or_unpaired":
+ # this can be thought of as a subcollection of anything except a pair
+ # since it would match a pair exactly
+ return collection_type != "paired"
+ if other_collection_type == "single_datasets":
+ # effectively any collection has unpaired subcollections
+ return True
+ return False
def is_subcollection_of_type(self, other_collection_type):
if not hasattr(other_collection_type, "collection_type"):
other_collection_type = self.collection_type_description_factory.for_collection_type(other_collection_type)
return other_collection_type.has_subcollections_of_type(self)
- def can_match_type(self, other_collection_type):
+ def can_match_type(self, other_collection_type) -> bool:
if hasattr(other_collection_type, "collection_type"):
other_collection_type = other_collection_type.collection_type
collection_type = self.collection_type
- return other_collection_type == collection_type
+ if other_collection_type == collection_type:
+ return True
+ elif other_collection_type == "paired" and collection_type == "paired_or_unpaired":
+ return True
+ elif other_collection_type == "paired_or_unpaired" and collection_type == "paired":
+ return True
+
+ # can we push this to the type registry somehow?
+ return False
def subcollection_type_description(self):
if not self.__has_subcollections:
diff --git a/lib/galaxy/model/dataset_collections/types/__init__.py b/lib/galaxy/model/dataset_collections/types/__init__.py
index c294f6957be6..30eee0489d58 100644
--- a/lib/galaxy/model/dataset_collections/types/__init__.py
+++ b/lib/galaxy/model/dataset_collections/types/__init__.py
@@ -21,3 +21,11 @@ def generate_elements(self, dataset_instances: dict, **kwds):
class BaseDatasetCollectionType(DatasetCollectionType):
def _validation_failed(self, message):
raise exceptions.ObjectAttributeInvalidException(message)
+
+ def _ensure_dataset_with_identifier(self, dataset_instances: dict, name: str):
+ dataset_instance = dataset_instances.get(name)
+ if dataset_instance is None:
+ raise exceptions.ObjectAttributeInvalidException(
+ f"An element with the identifier {name} is required to create this collection type"
+ )
+ return dataset_instance
diff --git a/lib/galaxy/model/dataset_collections/types/paired.py b/lib/galaxy/model/dataset_collections/types/paired.py
index e774ab67aace..825283fb0243 100644
--- a/lib/galaxy/model/dataset_collections/types/paired.py
+++ b/lib/galaxy/model/dataset_collections/types/paired.py
@@ -1,3 +1,4 @@
+from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model import (
DatasetCollectionElement,
HistoryDatasetAssociation,
@@ -16,13 +17,19 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType):
collection_type = "paired"
def generate_elements(self, dataset_instances, **kwds):
- if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER):
+ num_datasets = len(dataset_instances)
+ if num_datasets != 2:
+ raise RequestParameterInvalidException(
+ f"Incorrect number of datasets - 2 datasets exactly are required to create a single_or_paired collection"
+ )
+
+ if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
- if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER):
+ if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
diff --git a/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py b/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py
new file mode 100644
index 000000000000..8a8a6cd7e112
--- /dev/null
+++ b/lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py
@@ -0,0 +1,46 @@
+from galaxy.exceptions import RequestParameterInvalidException
+from galaxy.model import (
+ DatasetCollectionElement,
+ HistoryDatasetAssociation,
+)
+from . import BaseDatasetCollectionType
+from .paired import (
+ FORWARD_IDENTIFIER,
+ REVERSE_IDENTIFIER,
+)
+
+SINGLETON_IDENTIFIER = "unpaired"
+
+
+class PairedOrUnpairedDatasetCollectionType(BaseDatasetCollectionType):
+ """ """
+
+ collection_type = "paired_or_unpaired"
+
+ def generate_elements(self, dataset_instances, **kwds):
+ num_datasets = len(dataset_instances)
+ if num_datasets > 2 or num_datasets < 1:
+ raise RequestParameterInvalidException(
+ f"Incorrect number of datasets - 1 or 2 datasets is required to create a paired_or_unpaired collection"
+ )
+
+ if num_datasets == 2:
+ if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
+ left_association = DatasetCollectionElement(
+ element=forward_dataset,
+ element_identifier=FORWARD_IDENTIFIER,
+ )
+ yield left_association
+ if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
+ right_association = DatasetCollectionElement(
+ element=reverse_dataset,
+ element_identifier=REVERSE_IDENTIFIER,
+ )
+ yield right_association
+ else:
+ if single_datasets := self._ensure_dataset_with_identifier(dataset_instances, SINGLETON_IDENTIFIER):
+ single_association = DatasetCollectionElement(
+ element=single_datasets,
+ element_identifier=SINGLETON_IDENTIFIER,
+ )
+ yield single_association
diff --git a/lib/galaxy/schema/schema.py b/lib/galaxy/schema/schema.py
index 8c031a0fcfe1..010e7a78c278 100644
--- a/lib/galaxy/schema/schema.py
+++ b/lib/galaxy/schema/schema.py
@@ -33,6 +33,8 @@
from typing_extensions import (
Annotated,
Literal,
+ NotRequired,
+ TypedDict,
)
from galaxy.schema import partial_model
diff --git a/lib/galaxy/tool_util/parameters/models.py b/lib/galaxy/tool_util/parameters/models.py
index d0e7e6bbcb8f..1c6eddfc403a 100644
--- a/lib/galaxy/tool_util/parameters/models.py
+++ b/lib/galaxy/tool_util/parameters/models.py
@@ -33,6 +33,7 @@
StrictInt,
StrictStr,
Tag,
+ TypeAdapter,
ValidationError,
)
from typing_extensions import (
@@ -329,6 +330,7 @@ def request_requires_value(self) -> bool:
DataSrcT = Literal["hda", "ldda"]
MultiDataSrcT = Literal["hda", "ldda", "hdca"]
+# @jmchilton you meant CollectionSrcT - fix that at some point please.
CollectionStrT = Literal["hdca"]
TestCaseDataSrcT = Literal["File"]
@@ -527,6 +529,78 @@ class DataCollectionRequestInternal(StrictModel):
id: StrictInt
+CollectionAdapterSrcT = Literal["CollectionAdapter"]
+
+
+class AdaptedDataCollectionRequestBase(StrictModel):
+ src: CollectionAdapterSrcT
+
+
+class AdaptedDataCollectionPromoteDatasetToCollectionRequest(AdaptedDataCollectionRequestBase):
+ adapter_type: Literal["PromoteDatasetToCollection"]
+ collection_type: Literal["list", "paired_or_unpaired"]
+ adapting: DataRequestHda
+
+
+# calling this name and element_identifier to align with fetch API, etc...
+class AdapterElementRequest(DataRequestHda):
+ name: str # element_identifier
+
+
+class AdaptedDataCollectionPromoteDatasetsToCollectionRequest(AdaptedDataCollectionRequestBase):
+ adapter_type: Literal["PromoteDatasetsToCollection"]
+ # could allow list in here without changing much else I think but I'm trying to keep these tight in scope
+ collection_type: Literal["paired", "paired_or_unpaired"]
+ adapting: List[AdapterElementRequest]
+
+
+AdaptedDataCollectionRequest = Annotated[
+ Union[
+ AdaptedDataCollectionPromoteDatasetToCollectionRequest, AdaptedDataCollectionPromoteDatasetsToCollectionRequest
+ ],
+ Field(discriminator="adapter_type"),
+]
+AdaptedDataCollectionRequestTypeAdapter = TypeAdapter(AdaptedDataCollectionRequest)
+
+
+class DatasetCollectionElementReference(StrictModel):
+ src: Literal["dce"]
+ id: StrictInt
+
+
+class AdaptedDataCollectionPromoteCollectionElementToCollectionRequestInternal(AdaptedDataCollectionRequestBase):
+ adapter_type: Literal["PromoteCollectionElementToCollection"]
+ adapting: DatasetCollectionElementReference
+
+
+class AdaptedDataCollectionPromoteDatasetToCollectionRequestInternal(AdaptedDataCollectionRequestBase):
+ adapter_type: Literal["PromoteDatasetToCollection"]
+ collection_type: Literal["list", "paired_or_unpaired"]
+ adapting: DataRequestInternalHda
+
+
+class AdapterElementRequestInternal(DataRequestInternalHda):
+ name: str # element_identifier
+
+
+class AdaptedDataCollectionPromoteDatasetsToCollectionRequestInternal(AdaptedDataCollectionRequestBase):
+ adapter_type: Literal["PromoteDatasetsToCollection"]
+ # could allow list in here without changing much else I think but I'm trying to keep these tight in scope
+ collection_type: Literal["paired", "paired_or_unpaired"]
+ adapting: List[AdapterElementRequestInternal]
+
+
+AdaptedDataCollectionRequestInternal = Annotated[
+ Union[
+ AdaptedDataCollectionPromoteCollectionElementToCollectionRequestInternal,
+ AdaptedDataCollectionPromoteDatasetToCollectionRequestInternal,
+ AdaptedDataCollectionPromoteDatasetsToCollectionRequestInternal,
+ ],
+ Field(discriminator="adapter_type"),
+]
+AdaptedDataCollectionRequestInternalTypeAdapter = TypeAdapter(AdaptedDataCollectionRequestInternal)
+
+
class DataCollectionParameterModel(BaseGalaxyToolParameterModelDefinition):
parameter_type: Literal["gx_data_collection"] = "gx_data_collection"
collection_type: Optional[str] = None
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py
index 1174db417b3b..08a6a0054ec4 100644
--- a/lib/galaxy/tools/__init__.py
+++ b/lib/galaxy/tools/__init__.py
@@ -3534,6 +3534,63 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
)
+class SplitPairedAndUnpairedTool(DatabaseOperationTool):
+ tool_type = "split_paired_and_unpaired"
+ require_terminal_states = False
+ require_dataset_ok = False
+
+ def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
+ has_collection = incoming["input"]
+ if hasattr(has_collection, "element_type"):
+ # It is a DCE
+ collection = has_collection.element_object
+ else:
+ # It is an HDCA
+ collection = has_collection.collection
+
+ collection_type = collection.collection_type
+ assert collection_type in ["list", "list:paired", "list:paired_or_unpaired"]
+
+ unpaired_dce_copies = {}
+ paired_dce_copies = {}
+ paired_datasets = []
+
+ def _handle_unpaired(dce):
+ element_identifier = dce.element_identifier
+ assert getattr(dce.element_object, "history_content_type", None) == "dataset"
+ copied_value = dce.element_object.copy(copy_tags=dce.element_object.tags, flush=False)
+ unpaired_dce_copies[element_identifier] = copied_value
+
+ def _handle_paired(dce):
+ element_identifier = dce.element_identifier
+ copied_value = dce.element_object.copy(flush=False)
+ paired_dce_copies[element_identifier] = copied_value
+ paired_datasets.append(copied_value.elements[0].element_object)
+ paired_datasets.append(copied_value.elements[1].element_object)
+
+ if collection_type == "list":
+ for element in collection.elements:
+ _handle_unpaired(element)
+ elif collection_type == "list:paired":
+ for element in collection.elements:
+ _handle_paired(element)
+ elif collection_type == "list:paired_or_unpaired":
+ for element in collection.elements:
+ if getattr(element.element_object, "history_content_type", None) == "dataset":
+ _handle_unpaired(element)
+ else:
+ _handle_paired(element)
+
+ self._add_datasets_to_history(history, unpaired_dce_copies.values())
+ self._add_datasets_to_history(history, paired_datasets)
+ output_collections.create_collection(
+ self.outputs["output_unpaired"], "output_unpaired", elements=unpaired_dce_copies, propagate_hda_tags=False
+ )
+ output_collections.create_collection(
+ self.outputs["output_paired"], "output_paired", elements=paired_dce_copies, propagate_hda_tags=False
+ )
+
+
class ExtractDatasetCollectionTool(DatabaseOperationTool):
tool_type = "extract_dataset"
require_terminal_states = False
diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py
index f7a2138795a3..df4f87ad009d 100644
--- a/lib/galaxy/tools/actions/__init__.py
+++ b/lib/galaxy/tools/actions/__init__.py
@@ -34,6 +34,7 @@
WorkflowRequestInputParameter,
)
from galaxy.model.base import transaction
+from galaxy.model.dataset_collections.adapters import CollectionAdapter
from galaxy.model.dataset_collections.builder import CollectionBuilder
from galaxy.model.dataset_collections.matching import MatchingCollections
from galaxy.model.none_like import NoneDataset
@@ -265,12 +266,16 @@ def process_dataset(data, formats=None):
collection = None
child_collection = False
- if hasattr(value, "child_collection"):
- # if we are mapping a collection over a tool, we only require the child_collection
+ if isinstance(value, CollectionAdapter):
+ # collection was created for this execution, use it as is
+ collection = value
+ elif hasattr(value, "child_collection"):
+ # if we are mapping a collection over a tool, so value is a DCE and
+ # we only require the child_collection
child_collection = True
collection = value.child_collection
else:
- # else the tool takes a collection as input so we need everything
+ # else the tool takes the collection as input so we need everything
collection = value.collection
action_tuples = collection.dataset_action_tuples
@@ -935,6 +940,19 @@ def _record_inputs(self, trans, tool, job, incoming, inp_data, inp_dataset_colle
job.add_input_dataset_collection(name, dataset_collection)
elif isinstance(dataset_collection, model.DatasetCollectionElement):
job.add_input_dataset_collection_element(name, dataset_collection)
+ elif isinstance(dataset_collection, CollectionAdapter):
+ adapting = dataset_collection.adapting
+ # TODO: record adapter json in the association I think... -John
+ if isinstance(adapting, model.DatasetCollectionElement):
+ job.add_input_dataset_collection_element(name, adapting)
+ elif isinstance(adapting, model.HistoryDatasetAssociation):
+ job.add_input_dataset(name, dataset=adapting)
+ elif isinstance(adapting, list):
+ for element in adapting:
+ input_key = f"{name}|__adapter_part__|{element.element_identifier}"
+ job.add_input_dataset(input_key, dataset=element.hda)
+ else:
+ log.info(f"not recording something as a collection in here... for name {name}")
# If this an input collection is a reduction, we expanded it for dataset security, type
# checking, and such, but the persisted input must be the original collection
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
index 6b06711c9c1f..39a12c319b40 100644
--- a/lib/galaxy/tools/parameters/basic.py
+++ b/lib/galaxy/tools/parameters/basic.py
@@ -40,6 +40,12 @@
LibraryDatasetDatasetAssociation,
)
from galaxy.model.dataset_collections import builder
+from galaxy.model.dataset_collections.adapters import (
+ CollectionAdapter,
+ recover_adapter,
+ TransientCollectionAdapterDatasetInstanceElement,
+ validate_collection_adapter_src_dict,
+)
from galaxy.schema.fetch_data import FilesPayload
from galaxy.tool_util.parameters.factory import get_color_value
from galaxy.tool_util.parser import get_input_source as ensure_input_source
@@ -2013,6 +2019,21 @@ def src_id_to_item(
HistoryDatasetCollectionAssociation,
LibraryDatasetDatasetAssociation,
]:
+ adapter_model = None
+ if value["src"] == "CollectionAdapter":
+ adapter_model = validate_collection_adapter_src_dict(value)
+ adapting = adapter_model.adapting
+ if isinstance(adapting, list):
+ elements = []
+ for item in adapting:
+ element = TransientCollectionAdapterDatasetInstanceElement(
+ item.name,
+ src_id_to_item(sa_session, item.dict(), security),
+ )
+ elements.append(element)
+ return recover_adapter(elements, adapter_model)
+ else:
+ value = adapting.dict()
src_to_class = {
"hda": HistoryDatasetAssociation,
"ldda": LibraryDatasetDatasetAssociation,
@@ -2027,6 +2048,8 @@ def src_id_to_item(
raise ValueError(f"Unknown input source {value['src']} passed to job submission API.")
if not item:
raise ValueError("Invalid input id passed to job submission API.")
+ if adapter_model is not None:
+ item = recover_adapter(item, adapter_model)
item.extra_params = {k: v for k, v in value.items() if k not in ("src", "id")}
return item
@@ -2477,9 +2500,17 @@ def from_json(self, value, trans, other_values=None):
# a DatasetCollectionElement instead of a
# HistoryDatasetCollectionAssociation.
rval = value
- elif isinstance(value, MutableMapping) and "src" in value and "id" in value:
- if value["src"] == "hdca":
- rval = session.get(HistoryDatasetCollectionAssociation, trans.security.decode_id(value["id"]))
+ elif isinstance(value, CollectionAdapter):
+ log.info("\n\n\n\n\n\n HAVE AN EPHEMERAL COLLECTION... \n\n\n\n\n\n\n")
+ # if this mapped over a paired_or_unpaired collection - this parameter
+ # will receive an HDA instead of HDCA or DCE
+ rval = value
+ elif (
+ isinstance(value, MutableMapping)
+ and "src" in value
+ and ("id" in value or value["src"] == "CollectionAdapter")
+ ):
+ rval = src_id_to_item(sa_session=trans.sa_session, value=value, security=trans.security)
elif isinstance(value, list):
if len(value) > 0:
value = value[0]
@@ -2801,13 +2832,25 @@ def write_elements_to_collection(has_elements, collection_builder):
def history_item_dict_to_python(value, app, name):
if isinstance(value, MutableMapping) and "src" in value:
- if value["src"] not in ("hda", "dce", "ldda", "hdca"):
+ if value["src"] not in ("hda", "dce", "ldda", "hdca", "CollectionAdapter"):
raise ParameterValueError(f"Invalid value {value}", name)
return src_id_to_item(sa_session=app.model.context, security=app.security, value=value)
def history_item_to_json(value, app, use_security):
src = None
+
+ # unwrap adapter
+ collection_adapter: Optional[CollectionAdapter] = None
+ if isinstance(value, CollectionAdapter):
+ collection_adapter = value
+ value = value.adapting
+ if isinstance(value, list):
+ # if we are not just adapting one thing... skip the rest of this
+ # and just serialize the stuff we know we want anyway. Perhaps all
+ # this should just be the only path through. The CollectionAdapter
+ # should know what to do with just use_security I think?
+ return collection_adapter.to_adapter_model(value).dict()
if isinstance(value, MutableMapping) and "src" in value and "id" in value:
return value
elif isinstance(value, DatasetCollectionElement):
@@ -2823,4 +2866,7 @@ def history_item_to_json(value, app, use_security):
src = "hda"
if src is not None:
object_id = cached_id(value)
- return {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src}
+ rval = {"id": app.security.encode_id(object_id) if use_security else object_id, "src": src}
+ if collection_adapter:
+ rval = collection_adapter.to_adapter_model(rval).dict()
+ return rval
diff --git a/lib/galaxy/tools/split_paired_and_unpaired.xml b/lib/galaxy/tools/split_paired_and_unpaired.xml
new file mode 100644
index 000000000000..b05db5207fac
--- /dev/null
+++ b/lib/galaxy/tools/split_paired_and_unpaired.xml
@@ -0,0 +1,132 @@
+
+
+
+
+
+ operation_2409
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
index 12d3d779cafc..7cefacbd4269 100644
--- a/lib/galaxy/tools/wrappers.py
+++ b/lib/galaxy/tools/wrappers.py
@@ -19,7 +19,10 @@
Union,
)
-from typing_extensions import TypeAlias
+from typing_extensions import (
+ Self,
+ TypeAlias,
+)
from galaxy.model import (
DatasetCollection,
@@ -489,6 +492,14 @@ def _path_or_uri(self) -> str:
def file_name(self) -> str:
return str(self)
+ @property
+ def has_single_item(self) -> bool:
+ return True
+
+ @property
+ def single_item(self) -> Self:
+ return self
+
def __getattr__(self, key: Any) -> Any:
if key in ("extra_files_path", "files_path"):
if not self.compute_environment:
@@ -766,6 +777,14 @@ def serialize(
include_collection_name=include_collection_name,
)
+ @property
+ def has_single_item(self) -> bool:
+ return self.__input_supplied and len(self.__element_instance_list) == 1
+
+ @property
+ def single_item(self) -> Self:
+ return self[0]
+
@property
def is_input_supplied(self) -> bool:
return self.__input_supplied
diff --git a/lib/galaxy_test/api/test_dataset_collections.py b/lib/galaxy_test/api/test_dataset_collections.py
index 372d693d4fd0..d2b3e416f5a2 100644
--- a/lib/galaxy_test/api/test_dataset_collections.py
+++ b/lib/galaxy_test/api/test_dataset_collections.py
@@ -101,6 +101,25 @@ def test_create_list_of_new_pairs(self):
pair_1_element_1 = pair_elements[0]
assert pair_1_element_1["element_index"] == 0
+ def test_create_paried_or_unpaired(self, history_id):
+ collection_name = "a singleton in a paired_or_unpaired collection"
+ contents = [
+ ("unpaired", "1\t2\t3"),
+ ]
+ single_identifier = self.dataset_collection_populator.list_identifiers(history_id, contents)
+ payload = dict(
+ name=collection_name,
+ instance_type="history",
+ history_id=history_id,
+ element_identifiers=single_identifier,
+ collection_type="paired_or_unpaired",
+ )
+ create_response = self._post("dataset_collections", payload, json=True)
+ dataset_collection = self._check_create_response(create_response)
+ assert dataset_collection["collection_type"] == "paired_or_unpaired"
+ returned_collections = dataset_collection["elements"]
+ assert len(returned_collections) == 1, dataset_collection
+
def test_create_record(self, history_id):
contents = [
("condition", "1\t2\t3"),
diff --git a/lib/galaxy_test/api/test_tool_execute.py b/lib/galaxy_test/api/test_tool_execute.py
index 95bf43e27921..f85afcc4642a 100644
--- a/lib/galaxy_test/api/test_tool_execute.py
+++ b/lib/galaxy_test/api/test_tool_execute.py
@@ -380,6 +380,104 @@ def test_map_over_collection(
output_collection.assert_has_dataset_element("reverse").with_contents_stripped("456")
+@requires_tool_id("collection_paired_or_unpaired")
+def test_map_over_paired_or_unpaired_with_list_paired(target_history: TargetHistory, required_tool: RequiredTool):
+ hdca = target_history.with_example_list_of_pairs()
+ execute = required_tool.execute.with_inputs(
+ {"f1": {"batch": True, "values": [{"map_over_type": "paired", **hdca.src_dict}]}}
+ )
+ execute.assert_has_n_jobs(2).assert_creates_n_implicit_collections(1)
+ output_collection = execute.assert_creates_implicit_collection(0)
+ output_collection.assert_has_dataset_element("test0").with_contents_stripped("123\n456")
+ output_collection.assert_has_dataset_element("test1").with_contents_stripped("789\n0ab")
+
+
+@requires_tool_id("collection_paired_or_unpaired")
+def test_map_over_paired_or_unpaired_with_list(target_history: TargetHistory, required_tool: RequiredTool):
+ contents = [("foo", "text for foo element")]
+ hdca = target_history.with_list(contents)
+ execute = required_tool.execute.with_inputs(
+ {"f1": {"batch": True, "values": [{"map_over_type": "single_datasets", **hdca.src_dict}]}}
+ )
+ execute.assert_has_n_jobs(1).assert_creates_n_implicit_collections(1)
+ output_collection = execute.assert_creates_implicit_collection(0)
+ output_collection.assert_has_dataset_element("foo").with_contents_stripped("text for foo element")
+
+
+@requires_tool_id("collection_paired_or_unpaired")
+def test_map_over_paired_or_unpaired_with_list_of_lists(target_history: TargetHistory, required_tool: RequiredTool):
+ hdca = target_history.with_example_list_of_lists()
+ execute = required_tool.execute.with_inputs(
+ {"f1": {"batch": True, "values": [{"map_over_type": "single_datasets", **hdca.src_dict}]}}
+ )
+ execute.assert_has_n_jobs(3).assert_creates_n_implicit_collections(1)
+ output_collection = execute.assert_creates_implicit_collection(0)
+ print(output_collection.details)
+ assert output_collection.details["collection_type"] == "list:list"
+ as_dict_0 = output_collection.with_element_dict(0)
+ assert len(as_dict_0["object"]["elements"]) == 3
+
+
+@requires_tool_id("collection_paired_or_unpaired")
+def test_adapting_dataset_to_paired_or_unpaired(target_history: TargetHistory, required_tool: RequiredTool):
+ hda1 = target_history.with_dataset("1\t2\t3").src_dict
+ execution = required_tool.execute.with_inputs(
+ {
+ "f1": {
+ "src": "CollectionAdapter",
+ "adapter_type": "PromoteDatasetToCollection",
+ "collection_type": "paired_or_unpaired",
+ "adapting": hda1,
+ }
+ }
+ )
+ execution.assert_has_job(0).with_output("out1").with_contents_stripped("1\t2\t3")
+
+
+@requires_tool_id("cat_collection")
+def test_adapting_dataset_to_list(target_history: TargetHistory, required_tool: RequiredTool):
+ hda1 = target_history.with_dataset("1\t2\t3").src_dict
+ execution = required_tool.execute.with_inputs(
+ {
+ "input1": {
+ "src": "CollectionAdapter",
+ "adapter_type": "PromoteDatasetToCollection",
+ "collection_type": "list",
+ "adapting": hda1,
+ }
+ }
+ )
+ execution.assert_has_job(0).with_output("out_file1").with_contents_stripped("1\t2\t3")
+
+
+@requires_tool_id("collection_paired_test")
+def test_adapting_two_datasets_to_paired_collection(target_history: TargetHistory, required_tool: RequiredTool):
+ hda1 = target_history.with_dataset("1\t2\t3").src_dict
+ hda2 = target_history.with_dataset("4\t5\t6").src_dict
+ execution = required_tool.execute.with_inputs(
+ {
+ "f1": {
+ "src": "CollectionAdapter",
+ "adapter_type": "PromoteDatasetsToCollection",
+ "collection_type": "paired",
+ "adapting": [
+ {"name": "forward", **hda1},
+ {"name": "reverse", **hda2},
+ ],
+ }
+ }
+ )
+ execution.assert_has_job(0).with_output("out1").with_contents_stripped("1\t2\t3\n4\t5\t6")
+
+
+@requires_tool_id("gx_data")
+def test_map_over_data_param_with_list_of_lists(target_history: TargetHistory, required_tool: RequiredTool):
+ hdca = target_history.with_example_list_of_lists()
+ execute = required_tool.execute.with_inputs({"parameter": {"batch": True, "values": [hdca.src_dict]}})
+ execute.assert_has_n_jobs(3).assert_creates_n_implicit_collections(1)
+ execute.assert_creates_implicit_collection(0)
+
+
@requires_tool_id("gx_repeat_boolean_min")
def test_optional_repeats_with_mins_filled_id(target_history: TargetHistory, required_tool: RequiredTool):
# we have a tool test for this but I wanted to verify it wasn't just the
diff --git a/lib/galaxy_test/base/populators.py b/lib/galaxy_test/base/populators.py
index ee32c27b822c..528156ca244e 100644
--- a/lib/galaxy_test/base/populators.py
+++ b/lib/galaxy_test/base/populators.py
@@ -3951,6 +3951,12 @@ def with_list(self, contents: Optional[ListContentsDescription] = None) -> "HasS
def with_example_list_of_pairs(self) -> "HasSrcDict":
return HasSrcDict("hdca", self._dataset_collection_populator.example_list_of_pairs(self._history_id))
+ def with_example_list_of_lists(self) -> "HasSrcDict":
+ return HasSrcDict(
+ "hdca",
+ self._dataset_collection_populator.create_list_of_list_in_history(self._history_id, wait=True).json()["id"],
+ )
+
@classmethod
def _fetch_response(clz, response: Response) -> "HasSrcDict":
api_asserts.assert_status_code_is_ok(response)
diff --git a/test/functional/tools/collection_paired_or_unpaired.xml b/test/functional/tools/collection_paired_or_unpaired.xml
new file mode 100644
index 000000000000..6bf70e7735f8
--- /dev/null
+++ b/test/functional/tools/collection_paired_or_unpaired.xml
@@ -0,0 +1,51 @@
+
+
+ #if $f1.has_single_item:
+ cat $f1.single_item >> $out1;
+ echo "Single item"
+ #else
+ cat $f1.forward $f1['reverse'] >> $out1;
+ echo "Paired items"
+ #end if
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/functional/tools/sample_tool_conf.xml b/test/functional/tools/sample_tool_conf.xml
index 477fe69bde34..49e414eeabd6 100644
--- a/test/functional/tools/sample_tool_conf.xml
+++ b/test/functional/tools/sample_tool_conf.xml
@@ -213,6 +213,7 @@
+
@@ -320,5 +321,5 @@
-
+
diff --git a/test/unit/data/dataset_collections/test_matching.py b/test/unit/data/dataset_collections/test_matching.py
index 3ea1747d38df..c6fdac01f750 100644
--- a/test/unit/data/dataset_collections/test_matching.py
+++ b/test/unit/data/dataset_collections/test_matching.py
@@ -48,6 +48,12 @@ def test_valid_collection_subcollection_matching():
assert_can_match((nested_list, "paired"), flat_list)
+def test_paired_can_act_as_paired_or_unpaired():
+ paired = pair_instance()
+ optional_paired = paired_or_unpaired_pair_instance()
+ assert_can_match(paired, optional_paired)
+
+
def assert_can_match(*items):
to_match = build_collections_to_match(*items)
matching.MatchingCollections.for_collections(to_match, TYPE_DESCRIPTION_FACTORY)
@@ -114,6 +120,44 @@ def list_paired_instance():
)
+def list_of_paired_and_unpaired_instance():
+ return collection_instance(
+ collection_type="list:paired_or_unpaired",
+ elements=[
+ collection_element(
+ "el1",
+ collection(
+ "paired_or_unpaired",
+ [
+ hda_element("forward"),
+ hda_element("reverse"),
+ ],
+ ),
+ ),
+ collection_element(
+ "el2",
+ collection(
+ "paired_or_unpaired",
+ [
+ hda_element("unpaired"),
+ ],
+ ),
+ ),
+ ],
+ )
+
+
+def paired_or_unpaired_pair_instance():
+ paired_collection_instance = collection_instance(
+ collection_type="paired_or_unpaired",
+ elements=[
+ hda_element("forward"),
+ hda_element("reverse"),
+ ],
+ )
+ return paired_collection_instance
+
+
def list_instance(collection_type="list", elements=None, ids=None):
if not elements:
if ids is None:
diff --git a/test/unit/data/dataset_collections/test_structure.py b/test/unit/data/dataset_collections/test_structure.py
index 1c97d353a768..438c6c1c3690 100644
--- a/test/unit/data/dataset_collections/test_structure.py
+++ b/test/unit/data/dataset_collections/test_structure.py
@@ -2,6 +2,7 @@
from galaxy.model.dataset_collections.type_description import CollectionTypeDescriptionFactory
from .test_matching import (
list_of_lists_instance,
+ list_of_paired_and_unpaired_instance,
list_paired_instance,
pair_instance,
)
@@ -25,6 +26,7 @@ def test_get_structure_list_paired_over_paired():
assert tree.children[0][0] == "data1"
assert tree.children[0][1].is_leaf
+
def test_get_structure_list_of_lists():
list_of_lists_type_description = factory.for_collection_type("list:list")
tree = get_structure(list_of_lists_instance(), list_of_lists_type_description)
@@ -41,3 +43,32 @@ def test_get_structure_list_of_lists_over_list():
assert len(tree.children) == 2
assert tree.children[0][0] == "outer1"
assert tree.children[0][1].is_leaf
+
+
+def test_get_structure_list_paired_or_unpaired():
+ list_pair_or_unpaired_description = factory.for_collection_type("list:paired_or_unpaired")
+ tree = get_structure(list_of_paired_and_unpaired_instance(), list_pair_or_unpaired_description)
+ assert tree.collection_type_description.collection_type == "list:paired_or_unpaired"
+ assert len(tree.children) == 2
+ assert tree.children[0][0] == "el1"
+ assert not tree.children[0][1].is_leaf
+
+
+def test_get_structure_list_paired_or_unpaired_over_paired_or_unpaired():
+ list_pair_or_unpaired_description = factory.for_collection_type("list:paired_or_unpaired")
+ tree = get_structure(
+ list_of_paired_and_unpaired_instance(), list_pair_or_unpaired_description, "paired_or_unpaired"
+ )
+ assert tree.collection_type_description.collection_type == "list"
+ assert len(tree.children) == 2
+ assert tree.children[0][0] == "el1"
+ assert tree.children[0][1].is_leaf
+
+
+def test_get_structure_list_of_lists_over_single_datasests():
+ list_of_lists_type_description = factory.for_collection_type("list:list")
+ tree = get_structure(list_of_lists_instance(), list_of_lists_type_description, "single_datasets")
+ assert tree.collection_type_description.collection_type == "list:list"
+ assert len(tree.children) == 2
+ assert tree.children[0][0] == "outer1"
+ assert not tree.children[0][1].is_leaf
diff --git a/test/unit/data/dataset_collections/test_type_descriptions.py b/test/unit/data/dataset_collections/test_type_descriptions.py
index 11a1f7282a48..6d60d2269b02 100644
--- a/test/unit/data/dataset_collections/test_type_descriptions.py
+++ b/test/unit/data/dataset_collections/test_type_descriptions.py
@@ -1,8 +1,9 @@
from galaxy.model.dataset_collections.type_description import CollectionTypeDescriptionFactory
+factory = CollectionTypeDescriptionFactory(None)
+
def test_simple_descriptions():
- factory = CollectionTypeDescriptionFactory(None)
nested_type_description = factory.for_collection_type("list:paired")
paired_type_description = factory.for_collection_type("paired")
assert not nested_type_description.has_subcollections_of_type("list")
@@ -11,8 +12,23 @@ def test_simple_descriptions():
assert nested_type_description.has_subcollections_of_type(paired_type_description)
assert nested_type_description.has_subcollections()
assert not paired_type_description.has_subcollections()
- assert paired_type_description.rank_collection_type() == 'paired'
- assert nested_type_description.rank_collection_type() == 'list'
- assert nested_type_description.effective_collection_type(paired_type_description) == 'list'
- assert nested_type_description.effective_collection_type_description(paired_type_description).collection_type == 'list'
- assert nested_type_description.child_collection_type() == 'paired'
+ assert paired_type_description.rank_collection_type() == "paired"
+ assert nested_type_description.rank_collection_type() == "list"
+ assert nested_type_description.effective_collection_type(paired_type_description) == "list"
+ assert (
+ nested_type_description.effective_collection_type_description(paired_type_description).collection_type == "list"
+ )
+ assert nested_type_description.child_collection_type() == "paired"
+
+
+def test_paired_or_unpaired_handling():
+ list_type_description = factory.for_collection_type("list")
+ assert list_type_description.has_subcollections_of_type("paired_or_unpaired")
+ paired_type_description = factory.for_collection_type("paired")
+ assert not paired_type_description.has_subcollections_of_type("paired_or_unpaired")
+
+ nested_type_description = factory.for_collection_type("list:paired")
+ assert nested_type_description.has_subcollections_of_type("paired_or_unpaired")
+
+ nested_list_type_description = factory.for_collection_type("list:list")
+ assert nested_list_type_description.has_subcollections_of_type("paired_or_unpaired")