Skip to content

Commit

Permalink
Merge pull request #16662 from lldelisle/synchronize_collections
Browse files Browse the repository at this point in the history
Add harmonize collections tool (or whatever other name)
  • Loading branch information
jdavcs authored Feb 26, 2024
2 parents 37450c0 + b7982ea commit 4cc9a7a
Show file tree
Hide file tree
Showing 4 changed files with 271 additions and 0 deletions.
1 change: 1 addition & 0 deletions lib/galaxy/config/sample/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/harmonize_two_collections_list.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />
<tool file="${model_tools_path}/apply_rules.xml" />
<tool file="${model_tools_path}/build_list.xml" />
Expand Down
56 changes: 56 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3601,6 +3601,62 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
)


class HarmonizeTool(DatabaseOperationTool):
tool_type = "harmonize_list"
require_terminal_states = False
require_dataset_ok = False

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
# Get the 2 input collections
hdca1 = incoming["input1"]
hdca2 = incoming["input2"]
# Get the elements of both collections
elements1 = hdca1.collection.elements
elements2 = hdca2.collection.elements
# Put elements in dictionary with identifiers:
old_elements1_dict = {}
for element in elements1:
old_elements1_dict[element.element_identifier] = element
old_elements2_dict = {}
for element in elements2:
old_elements2_dict[element.element_identifier] = element
# Get the list of final identifiers
final_sorted_identifiers = [
element.element_identifier for element in elements1 if element.element_identifier in old_elements2_dict
]
# Raise Exception if it is empty
if len(final_sorted_identifiers) == 0:
# Create empty collections:
output_collections.create_collection(
next(iter(self.outputs.values())), "output1", elements={}, propagate_hda_tags=False
)
output_collections.create_collection(
next(iter(self.outputs.values())), "output2", elements={}, propagate_hda_tags=False
)
return

def output_with_selected_identifiers(old_elements_dict, output_label):
# Create a new dictionary with the elements in the good order
new_elements = {}
for identifier in final_sorted_identifiers:
dce_object = old_elements_dict[identifier].element_object
if getattr(dce_object, "history_content_type", None) == "dataset":
copied_dataset = dce_object.copy(copy_tags=dce_object.tags, flush=False)
else:
copied_dataset = dce_object.copy(flush=False)
new_elements[identifier] = copied_dataset
# Add datasets:
self._add_datasets_to_history(history, new_elements.values())
# Create collections:
output_collections.create_collection(
next(iter(self.outputs.values())), output_label, elements=new_elements, propagate_hda_tags=False
)

# Create outputs:
output_with_selected_identifiers(old_elements1_dict, "output1")
output_with_selected_identifiers(old_elements2_dict, "output2")


class RelabelFromFileTool(DatabaseOperationTool):
tool_type = "relabel_from_file"

Expand Down
213 changes: 213 additions & 0 deletions lib/galaxy/tools/harmonize_two_collections_list.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
<tool id="__HARMONIZELISTS__"
name="Harmonize two collections"
version="1.0.0"
tool_type="harmonize_list">
<description></description>
<type class="HarmonizeTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<edam_operations>
<edam_operation>operation_0335</edam_operation>
</edam_operations>
<inputs>
<param type="data_collection" collection_type="list,list:paired" name="input1" label="Input Collection with good order" />
<param type="data_collection" collection_type="list,list:paired" name="input2" label="Input Collection to order" />
</inputs>
<outputs>
<collection name="output1" format_source="input1" type_source="input1" label="${input1.name} (harmonized with ${input2.name})" />
<collection name="output2" format_source="input2" type_source="input2" label="${input2.name} (harmonized with ${input1.name})" />
</outputs>
<tests>
<test>
<!-- test that we can reorder a collection-->
<param name="input1">
<collection type="list">
<element name="element_1" value="simple_line_alternative.txt" />
<element name="element_2" value="simple_line.txt" />
</collection>
</param>
<param name="input2">
<collection type="list">
<element name="element_2" value="simple_line_alternative.txt" />
<element name="element_1" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output1" type="list" count="2">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
</output_collection>
<output_collection name="output2" type="list" count="2">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
</output_collection>
</test>
<!-- test that we can filter collection1-->
<test>
<param name="input1">
<collection type="list">
<element name="element_1" value="simple_line_alternative.txt" />
<element name="element_2" value="simple_line.txt" />
</collection>
</param>
<param name="input2">
<collection type="list">
<element name="element_1" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output1" type="list" count="1">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
</output_collection>
<output_collection name="output2" type="list" count="1">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
</output_collection>
</test>
<!-- test that we can filter collection2-->
<test>
<param name="input1">
<collection type="list">
<element name="element_2" value="simple_line.txt" />
</collection>
</param>
<param name="input2">
<collection type="list">
<element name="element_2" value="simple_line_alternative.txt" />
<element name="element_1" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output1" type="list" count="1">
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
</output_collection>
<output_collection name="output2" type="list" count="1">
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
</output_collection>
</test>
<!-- test that we can filter both collections-->
<test>
<param name="input1">
<collection type="list">
<element name="element_1" value="simple_line_alternative.txt" />
<element name="element_2" value="simple_line.txt" />
<element name="element_3" value="simple_line.txt" />
</collection>
</param>
<param name="input2">
<collection type="list">
<element name="element_0" value="simple_line_alternative.txt" />
<element name="element_2" value="simple_line_alternative.txt" />
<element name="element_1" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output1" type="list" count="2">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
</output_collection>
<output_collection name="output2" type="list" count="2">
<element name="element_1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
<element name="element_2">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
</element>
</output_collection>
</test>
<!-- test that we can end with 2 empty collections -->
<test>
<param name="input1">
<collection type="list">
<element name="element_1" value="simple_line_alternative.txt" />
<element name="element_2" value="simple_line.txt" />
</collection>
</param>
<param name="input2">
<collection type="list">
<element name="element_21" value="simple_line_alternative.txt" />
<element name="element_11" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output1" type="list" count="0"/>
<output_collection name="output2" type="list" count="0"/>
</test>
</tests>
<help><![CDATA[
========
Synopsis
========
Harmonize 2 collections: Inputs are 2 collections. Outputs are 2 collections with:
- Same identifiers (identifiers which are specific to one or the other are removed)
- Identifiers are in the same order
=======
Example
=======
If the inputs are::
Collection1: [Horse123]
[Donkey543]
[Mule176]
Collection2: [Horse]
[Mule176]
[Donkey543]
The tool will output::
Collection1: [Donkey543]
[Mule176]
Collection2: [Donkey543]
[Mule176]
-------
.. class:: infomark
This tool will create new history datasets from your collection but your quota usage will not increase.
]]></help>
</tool>
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/harmonize_two_collections_list.xml" />
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
Expand Down

0 comments on commit 4cc9a7a

Please sign in to comment.