Skip to content

Commit

Permalink
Implement paired_or_unpaired collections...
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Dec 19, 2024
1 parent d99bb78 commit 296bf34
Show file tree
Hide file tree
Showing 11 changed files with 326 additions and 3 deletions.
2 changes: 2 additions & 0 deletions lib/galaxy/model/dataset_collections/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@
from .types import (
list,
paired,
paired_or_unpaired,
record,
)

PLUGIN_CLASSES = [
list.ListDatasetCollectionType,
paired.PairedDatasetCollectionType,
record.RecordDatasetCollectionType,
paired_or_unpaired.PairedOrUnpairedDatasetCollectionType,
]


Expand Down
6 changes: 6 additions & 0 deletions lib/galaxy/model/dataset_collections/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,9 @@ def generate_elements(self, dataset_instances: dict, **kwds):
class BaseDatasetCollectionType(DatasetCollectionType):
def _validation_failed(self, message):
raise exceptions.ObjectAttributeInvalidException(message)

def _ensure_dataset_with_identifier(self, dataset_instances: dict, name: str):
dataset_instance = dataset_instances.get(name)
if dataset_instance is None:
raise exceptions.ObjectAttributeInvalidException(f"An element with the identifier {name} is required to create this collection type")
return dataset_instance
9 changes: 7 additions & 2 deletions lib/galaxy/model/dataset_collections/types/paired.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model import (
DatasetCollectionElement,
HistoryDatasetAssociation,
Expand All @@ -16,13 +17,17 @@ class PairedDatasetCollectionType(BaseDatasetCollectionType):
collection_type = "paired"

def generate_elements(self, dataset_instances, **kwds):
if forward_dataset := dataset_instances.get(FORWARD_IDENTIFIER):
num_datasets = len(dataset_instances)
if num_datasets != 2:
raise RequestParameterInvalidException(f"Incorrect number of datasets - 2 datasets exactly are required to create a single_or_paired collection")

if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
if reverse_dataset := dataset_instances.get(REVERSE_IDENTIFIER):
if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
Expand Down
45 changes: 45 additions & 0 deletions lib/galaxy/model/dataset_collections/types/paired_or_unpaired.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from galaxy.exceptions import RequestParameterInvalidException
from galaxy.model import (
DatasetCollectionElement,
HistoryDatasetAssociation,
)
from . import BaseDatasetCollectionType
from .paired import (
FORWARD_IDENTIFIER,
REVERSE_IDENTIFIER,
)

SINGLETON_IDENTIFIER = "unpaired"


class PairedOrUnpairedDatasetCollectionType(BaseDatasetCollectionType):
"""
"""

collection_type = "paired_or_unpaired"

def generate_elements(self, dataset_instances, **kwds):
num_datasets = len(dataset_instances)
if num_datasets > 2 or num_datasets < 1:
raise RequestParameterInvalidException(f"Incorrect number of datasets - 1 or 2 datasets is required to create a paired_or_unpaired collection")

if num_datasets == 2:
if forward_dataset := self._ensure_dataset_with_identifier(dataset_instances, FORWARD_IDENTIFIER):
left_association = DatasetCollectionElement(
element=forward_dataset,
element_identifier=FORWARD_IDENTIFIER,
)
yield left_association
if reverse_dataset := self._ensure_dataset_with_identifier(dataset_instances, REVERSE_IDENTIFIER):
right_association = DatasetCollectionElement(
element=reverse_dataset,
element_identifier=REVERSE_IDENTIFIER,
)
yield right_association
else:
if single_datasets := self._ensure_dataset_with_identifier(dataset_instances, SINGLETON_IDENTIFIER):
single_association = DatasetCollectionElement(
element=single_datasets,
element_identifier=SINGLETON_IDENTIFIER,
)
yield single_association
2 changes: 2 additions & 0 deletions lib/galaxy/schema/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
from typing_extensions import (
Annotated,
Literal,
NotRequired,
TypedDict,
)

from galaxy.schema import partial_model
Expand Down
56 changes: 56 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3534,6 +3534,62 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
)


class SplitPairedAndUnpairedTool(DatabaseOperationTool):
tool_type = "split_paired_and_unpaired"
require_terminal_states = False
require_dataset_ok = False

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
has_collection = incoming["input"]
if hasattr(has_collection, "element_type"):
# It is a DCE
collection = has_collection.element_object
else:
# It is an HDCA
collection = has_collection.collection

collection_type = collection.collection_type
assert collection_type in ["list", "list:paired", "list:paired_or_unpaired"]

unpaired_dce_copies = {}
paired_dce_copies = {}
paired_datasets = []

def _handle_unpaired(dce):
element_identifier = dce.element_identifier
assert getattr(dce.element_object, "history_content_type", None) == "dataset"
copied_value = dce.element_object.copy(copy_tags=dce.element_object.tags, flush=False)
unpaired_dce_copies[element_identifier] = copied_value

def _handle_paired(dce):
element_identifier = dce.element_identifier
copied_value = dce.element_object.copy(flush=False)
paired_dce_copies[element_identifier] = copied_value
paired_datasets.append(copied_value.elements[0].element_object)
paired_datasets.append(copied_value.elements[1].element_object)

if collection_type == "list":
for element in collection.elements:
_handle_unpaired(element)
elif collection_type == "list:paired":
for element in collection.elements:
_handle_paired(element)
elif collection_type == "list:paired_or_unpaired":
for element in collection.elements:
if getattr(element.element_object, "history_content_type", None) == "dataset":
_handle_unpaired(element)
else:
_handle_paired(element)

self._add_datasets_to_history(history, unpaired_dce_copies.values())
self._add_datasets_to_history(history, paired_datasets)
output_collections.create_collection(
self.outputs["output_unpaired"], "output_unpaired", elements=unpaired_dce_copies, propagate_hda_tags=False
)
output_collections.create_collection(
self.outputs["output_paired"], "output_paired", elements=paired_dce_copies, propagate_hda_tags=False
)

class ExtractDatasetCollectionTool(DatabaseOperationTool):
tool_type = "extract_dataset"
require_terminal_states = False
Expand Down
132 changes: 132 additions & 0 deletions lib/galaxy/tools/split_paired_and_unpaired.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
<tool id="__SPLIT_PAIRED_AND_UNPAIRED__"
name="Split Paired and Unpaired"
version="1.0.0"
tool_type="split_paired_and_unpaired">
<description></description>
<type class="SplitPairedAndUnpairedTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<edam_operations>
<edam_operation>operation_2409</edam_operation>
</edam_operations>
<inputs>
<param name="input" type="data_collection" label="Input Collection" collection_type="list:paired,list,list:paired_or_unpaired" />
</inputs>
<outputs>
<collection name="output_unpaired" format_source="input" type="list" label="${on_string} (unpaired)" >
</collection>
<collection name="output_paired" format_source="input" type="list:paired" label="${on_string} (paired)" >
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="el1" value="simple_line.txt" />
<element name="el2" value="simple_line_alternative.txt" />
</collection>
</param>
<output_collection name="output_unpaired" type="list" count="2">
<element name="el1" ftype="txt">
<assert_contents>
<has_line line="This is a line of text." />
</assert_contents>
</element>
<element name="el2" ftype="txt">
<assert_contents>
<has_line line="This is a different line of text." />
</assert_contents>
</element>
</output_collection>
<output_collection name="output_paired" type="list:paired" count="0">
</output_collection>
</test>
<test>
<param name="input">
<collection type="list:paired">
<element name="el1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
<output_collection name="output_unpaired" type="list" count="0">
</output_collection>
<output_collection name="output_paired" type="list:paired" count="1">
<element name="el1">
<element name="forward">
<assert_contents>
<has_line line="This is a line of text." />
</assert_contents>
</element>
<element name="reverse">
<assert_contents>
<has_line line="This is a different line of text." />
</assert_contents>
</element>
</element>
</output_collection>
</test>
<test>
<param name="input">
<collection type="list:paired_or_unpaired">
<element name="el1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
<element name="el2" value="simple_line.txt">
</element>
<element name="el3" value="simple_line_alternative.txt">
</element>
</collection>
</param>
<output_collection name="output_unpaired" type="list" count="2">
<element name="el2" ftype="txt">
<assert_contents>
<has_line line="This is a line of text." />
</assert_contents>
</element>
<element name="el3" ftype="txt">
<assert_contents>
<has_line line="This is a different line of text." />
</assert_contents>
</element>
</output_collection>
<output_collection name="output_paired" type="list:paired" count="1">
<element name="el1">
<element name="forward">
<assert_contents>
<has_line line="This is a line of text." />
</assert_contents>
</element>
<element name="reverse">
<assert_contents>
<has_line line="This is a different line of text." />
</assert_contents>
</element>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
========
Synopsis
========
===========
Description
===========
.. class:: infomark
This tool will create new history datasets for your collection but your quota usage will not increase.
]]></help>
</tool>
4 changes: 4 additions & 0 deletions lib/galaxy/tools/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,10 @@ def serialize(
include_collection_name=include_collection_name,
)

@property
def has_single_item(self) -> bool:
return self.__input_supplied and len(self.__element_instance_list) == 1

@property
def is_input_supplied(self) -> bool:
return self.__input_supplied
Expand Down
19 changes: 19 additions & 0 deletions lib/galaxy_test/api/test_dataset_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,25 @@ def test_create_list_of_new_pairs(self):
pair_1_element_1 = pair_elements[0]
assert pair_1_element_1["element_index"] == 0

def test_create_paried_or_unpaired(self, history_id):
collection_name = "a singleton in a paired_or_unpaired collection"
contents = [
("unpaired", "1\t2\t3"),
]
single_identifier = self.dataset_collection_populator.list_identifiers(history_id, contents)
payload = dict(
name=collection_name,
instance_type="history",
history_id=history_id,
element_identifiers=single_identifier,
collection_type="paired_or_unpaired",
)
create_response = self._post("dataset_collections", payload, json=True)
dataset_collection = self._check_create_response(create_response)
assert dataset_collection["collection_type"] == "paired_or_unpaired"
returned_collections = dataset_collection["elements"]
assert len(returned_collections) == 1, dataset_collection

def test_create_record(self, history_id):
contents = [
("condition", "1\t2\t3"),
Expand Down
51 changes: 51 additions & 0 deletions test/functional/tools/collection_paired_or_unpaired.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
<tool id="collection_paired_or_unpaired" name="collection_paired_or_unpaired" version="0.1.0">
<command>
#if $f1.has_single_item:
cat $f1.unpaired >> $out1;
echo "Single item"
#else
cat $f1.forward $f1['reverse'] >> $out1;
echo "Paired items"
#end if
</command>
<inputs>
<param name="f1" type="data_collection" collection_type="paired_or_unpaired" label="Input" />
</inputs>
<outputs>
<data format="txt" name="out1" />
</outputs>
<tests>
<test>
<param name="f1">
<collection type="paired_or_unpaired" name="collection name">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</param>
<output name="out1">
<assert_contents>
<has_line line="This is a line of text." />
<has_line line="This is a different line of text." />
</assert_contents>
</output>
<assert_stdout>
<has_line line="Paired items" />
</assert_stdout>
</test>
<test>
<param name="f1">
<collection type="paired_or_unpaired" name="collection name">
<element name="single" value="simple_line.txt" />
</collection>
</param>
<output name="out1">
<assert_contents>
<has_line line="This is a line of text." />
</assert_contents>
</output>
<assert_stdout>
<has_line line="Single item" />
</assert_stdout>
</test>
</tests>
</tool>
Loading

0 comments on commit 296bf34

Please sign in to comment.