Skip to content

Commit

Permalink
Merge pull request #17294 from lldelisle/filter_paused
Browse files Browse the repository at this point in the history
[24.0] Add ``__KEEP_SUCCESS_DATASETS__``
  • Loading branch information
mvdbeek authored Mar 5, 2024
2 parents e9e26b9 + d8dc597 commit b7c14c6
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 0 deletions.
19 changes: 19 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3204,13 +3204,16 @@ class DatabaseOperationTool(Tool):
require_terminal_states = True
require_dataset_ok = True
tool_type_local = True
require_terminal_or_paused_states = False

@property
def valid_input_states(self):
if self.require_dataset_ok:
return (model.Dataset.states.OK,)
elif self.require_terminal_states:
return model.Dataset.terminal_states
elif self.require_terminal_or_paused_states:
return model.Dataset.terminal_states or model.Dataset.states.PAUSED
else:
return model.Dataset.valid_input_states

Expand Down Expand Up @@ -3503,6 +3506,22 @@ def element_is_valid(element: model.DatasetCollectionElement):
return element.element_object.is_ok


class KeepSuccessDatasetsTool(FilterDatasetsTool):
tool_type = "keep_success_datasets_collection"
require_terminal_states = False
require_dataset_ok = False
require_terminal_or_paused_states = True

@staticmethod
def element_is_valid(element: model.DatasetCollectionElement):
if (
element.element_object.state != model.Dataset.states.PAUSED
and element.element_object.state in model.Dataset.non_ready_states
):
raise ToolInputsNotReadyException("An input dataset is pending.")
return element.element_object.is_ok


class FilterEmptyDatasetsTool(FilterDatasetsTool):
tool_type = "filter_empty_datasets_collection"
require_dataset_ok = False
Expand Down
64 changes: 64 additions & 0 deletions lib/galaxy/tools/keep_success_collection.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<tool id="__KEEP_SUCCESS_DATASETS__"
name="Keep success"
version="1.0.0"
tool_type="keep_success_datasets_collection">
<description></description>
<type class="KeepSuccessDatasetsTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<edam_operations>
<edam_operation>operation_3695</edam_operation>
</edam_operations>
<inputs>
<param type="data_collection" collection_type="list,list:paired" name="input" label="Input Collection" />
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (only successed datasets)" >
</collection>
</outputs>
<tests>
<!-- Test framework has no way of creating a collection with
unsuccess elements, so best we can do is verify identity on
an okay collection.
-->
<test>
<param name="input">
<collection type="list">
<element name="e1" value="simple_line.txt" />
</collection>
</param>
<output_collection name="output" type="list">
<element name="e1">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[
========
Synopsis
========
Keep datasets in success (green) from a collection.
===========
Description
===========
This tool takes a dataset collection and filters in (keep) datasets in the success (green) state. This is useful for continuing a multi-sample analysis when one of more of the samples fails or is in paused state.
.. image:: ${static_path}/images/tools/collection_ops/keep_success.svg
:width: 500
:alt: Keep success datasets
-----
.. class:: infomark
This tool will create new history datasets from your collection but your quota usage will not increase.
]]></help>
</tool>
122 changes: 122 additions & 0 deletions lib/galaxy_test/api/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -3255,6 +3255,128 @@ def filter_jobs_by_tool(tool_id):
# Follow proves one job was filtered out of the result of cat1
assert len(filter_jobs_by_tool("cat1")) == 1, jobs

def test_keep_success_mapping_error(self):
with self.dataset_populator.test_history() as history_id:
summary = self._run_workflow(
"""
class: GalaxyWorkflow
inputs:
input_c: collection
steps:
mixed_collection:
tool_id: exit_code_from_file
in:
input: input_c
filtered_collection:
tool_id: "__KEEP_SUCCESS_DATASETS__"
in:
input: mixed_collection/out_file1
cat:
tool_id: cat1
in:
input1: filtered_collection/output
""",
test_data="""
input_c:
collection_type: list
elements:
- identifier: i1
content: "0"
- identifier: i2
content: "1"
""",
history_id=history_id,
wait=True,
assert_ok=False,
)
jobs = summary.jobs

def filter_jobs_by_tool(tool_id):
return [j for j in summary.jobs if j["tool_id"] == tool_id]

assert len(filter_jobs_by_tool("exit_code_from_file")) == 2, jobs
assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs
# Follow proves one job was filtered out of the exit_code_from_file
# And a single one has been sent to cat1
assert len(filter_jobs_by_tool("cat1")) == 1, jobs

def test_keep_success_mapping_paused(self):
with self.dataset_populator.test_history() as history_id:
summary = self._run_workflow(
"""
class: GalaxyWorkflow
inputs:
input_c: collection
steps:
mixed_collection:
tool_id: exit_code_from_file
in:
input: input_c
cat:
tool_id: cat1
in:
input1: mixed_collection/out_file1
filtered_collection:
tool_id: "__KEEP_SUCCESS_DATASETS__"
in:
input: cat/out_file1
""",
test_data="""
input_c:
collection_type: list
elements:
- identifier: i1
content: "0"
- identifier: i2
content: "1"
- identifier: i3
content: "0"
""",
history_id=history_id,
wait=True,
assert_ok=False,
)
jobs = summary.jobs

def filter_jobs_by_tool(tool_id):
return [j for j in summary.jobs if j["tool_id"] == tool_id]

# Get invocation to access output collections
invocation = self.workflow_populator.get_invocation(summary.invocation_id, step_details=True)
# Check there are 3 exit_code_from_file
assert len(filter_jobs_by_tool("exit_code_from_file")) == 3, jobs
# Check output collection has 3 elements
output_mixed_collection_id = invocation["steps"][1]["output_collections"]["out_file1"]["id"]
mixed_collection = self.dataset_populator.get_history_collection_details(
history_id, content_id=output_mixed_collection_id, assert_ok=False
)
assert mixed_collection["element_count"] == 3, mixed_collection
# Check 3 jobs cat1 has been "scheduled":
assert len(filter_jobs_by_tool("cat1")) == 3, jobs
# Check 2 are 'ok' the other is 'paused'
output_cat_id = invocation["steps"][2]["output_collections"]["out_file1"]["id"]
cat_collection = self.dataset_populator.get_history_collection_details(
history_id, content_id=output_cat_id, assert_ok=False
)
assert cat_collection["element_count"] == 3, cat_collection
cat1_states = [e["object"]["state"] for e in cat_collection["elements"]]
assert "paused" in cat1_states, jobs
assert len([s for s in cat1_states if s == "ok"]) == 2, cat_collection
# Check the KEEP_SUCCESS_DATASETS have been run
assert len(filter_jobs_by_tool("__KEEP_SUCCESS_DATASETS__")) == 1, jobs
# Check the output has 2 elements
output_filtered_id = invocation["steps"][3]["output_collections"]["output"]["id"]
output_filtered = self.dataset_populator.get_history_collection_details(
history_id, content_id=output_filtered_id, assert_ok=False
)
assert output_filtered["element_count"] == 2, output_filtered

def test_workflow_request(self):
workflow = self.workflow_populator.load_workflow(name="test_for_queue")
workflow_request, history_id, workflow_id = self._setup_workflow_run(workflow)
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/sample_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@
<tool file="${model_tools_path}/unzip_collection.xml" />
<tool file="${model_tools_path}/zip_collection.xml" />
<tool file="${model_tools_path}/filter_failed_collection.xml" />
<tool file="${model_tools_path}/keep_success_collection.xml" />
<tool file="${model_tools_path}/filter_empty_collection.xml" />
<tool file="${model_tools_path}/flatten_collection.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
Expand Down

0 comments on commit b7c14c6

Please sign in to comment.