Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow selecting subset of collection by group tag #5457

Merged
merged 9 commits into from
Aug 7, 2018
100 changes: 97 additions & 3 deletions lib/galaxy/tools/parameters/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,9 +848,9 @@ def get_legal_values(self, trans, other_values):
else:
return self.legal_values

def from_json(self, value, trans, other_values={}):
def from_json(self, value, trans, other_values={}, require_legal_value=True):
legal_values = self.get_legal_values(trans, other_values)
if not legal_values and is_runtime_context(trans, other_values):
if (not legal_values or not require_legal_value) and is_runtime_context(trans, other_values):
if self.multiple:
# While it is generally allowed that a select value can be '',
# we do not allow this to be the case in a dynamically
Expand Down Expand Up @@ -888,7 +888,7 @@ def from_json(self, value, trans, other_values={}):
return []
else:
raise ValueError("No option was selected for %s but input is not optional." % self.name)
if value not in legal_values:
if value not in legal_values and require_legal_value:
raise ValueError("An invalid option was selected for %s, %r, please verify." % (self.name, value))
return value

Expand Down Expand Up @@ -1028,6 +1028,99 @@ def _get_dbkey_names(self, trans=None):
return self.tool.app.genome_builds.get_genome_build_names(trans=trans)


class SelectTagParameter(SelectToolParameter):
"""
Select set that is composed of a set of tags available for an input.
"""
def __init__(self, tool, input_source):
input_source = ensure_input_source(input_source)
SelectToolParameter.__init__(self, tool, input_source)
self.tool = tool
self.tag_key = input_source.get("group", False)
self.optional = input_source.get("optional", False)
self.multiple = input_source.get("multiple", False)
self.accept_default = input_source.get_bool("accept_default", False)
if self.accept_default:
self.optional = True
self.data_ref = input_source.get("data_ref", None)
self.ref_input = None
# Legacy style default value specification...
self.default_value = input_source.get("default_value", None)
if self.default_value is None:
# Newer style... more in line with other parameters.
self.default_value = input_source.get("value", None)
self.is_dynamic = True

def from_json(self, value, trans, other_values={}):
if self.multiple:
tag_list = []
# split on newline and ,
if isinstance(value, list) or isinstance(value, string_types):
if not isinstance(value, list):
value = value.split('\n')
for tag_str in value:
for tag in str(tag_str).split(','):
tag = tag.strip()
if tag:
tag_list.append(tag)
value = tag_list
else:
if not value:
value = None
# We skip requiring legal values -- this is similar to optional, but allows only subset of datasets to be positive
# TODO: May not actually be required for (nested) collection input ?
return super(SelectTagParameter, self).from_json(value, trans, other_values, require_legal_value=False)

def get_tag_list(self, other_values):
"""
Generate a select list containing the tags of the associated dataset (if found).
"""
# Get the value of the associated data reference (a dataset)
history_items = other_values.get(self.data_ref, None)
# Check if a dataset is selected
if not history_items:
return []
tags = set()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in practice we just make this a list, right ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that one is fine? I think we just drop the and tag not in tag_list at https://github.com/galaxyproject/galaxy/pull/5457/files#diff-9baf995401cfeb779edf8731ebaf0d2dR1064.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, I think it was a bit too late for me yesterday ...

for history_item in util.listify(history_items):
if hasattr(history_item, 'dataset_instances'):
for dataset in history_item.dataset_instances:
for tag in dataset.tags:
if tag.user_tname == 'group':
tags.add(tag.user_value)
else:
for tag in history_item.tags:
if tag.user_tname == 'group':
tags.add(tag.user_value)
return list(tags)

def get_options(self, trans, other_values):
"""
Show tags
"""
options = []
for tag in self.get_tag_list(other_values):
options.append(('Tags: ' + tag, tag, False))
return options

def get_initial_value(self, trans, other_values):
if self.default_value is not None:
return self.default_value
return SelectToolParameter.get_initial_value(self, trans, other_values)

def get_legal_values(self, trans, other_values):
if self.data_ref not in other_values:
raise ValueError("Value for associated data reference not found (data_ref).")
return set(self.get_tag_list(other_values))

def get_dependencies(self):
return [self.data_ref]

def to_dict(self, trans, other_values={}):
d = super(SelectTagParameter, self).to_dict(trans, other_values=other_values)
d['data_ref'] = self.data_ref
return d


class ColumnListParameter(SelectToolParameter):
"""
Select list that consists of either the total number of columns or only
Expand Down Expand Up @@ -2212,6 +2305,7 @@ def to_text(self, value):
genomebuild=GenomeBuildParameter,
select=SelectToolParameter,
color=ColorToolParameter,
group_tag=SelectTagParameter,
data_column=ColumnListParameter,
hidden=HiddenToolParameter,
hidden_data=HiddenDataToolParameter,
Expand Down
26 changes: 25 additions & 1 deletion lib/galaxy/tools/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import tempfile

from six import string_types
from six import string_types, text_type
from six.moves import shlex_quote

from galaxy import exceptions
Expand Down Expand Up @@ -171,6 +171,11 @@ def __add__(self, x):
def __getattr__(self, key):
return getattr(self.input, key)

def __iter__(self):
if not self.input.multiple:
raise Exception("Tried to iterate over a non-multiple parameter.")
return self.value.__iter__()


class DatasetFilenameWrapper(ToolParameterValueWrapper):
"""
Expand Down Expand Up @@ -233,6 +238,11 @@ def __init__(self, dataset, datatypes_registry=None, tool=None, name=None, datas
self.unsanitized = dataset
self.dataset = wrap_with_safe_string(dataset, no_wrap_classes=ToolParameterValueWrapper)
self.metadata = self.MetadataWrapper(dataset.metadata)
if hasattr(dataset, 'tags'):
self.groups = {tag.user_value.lower() for tag in dataset.tags if tag.user_tname == 'group'}
else:
# May be a 'FakeDatasetAssociation'
self.groups = set()
self.datatypes_registry = datatypes_registry
self.false_path = getattr(dataset_path, "false_path", None)
self.false_extra_files_path = getattr(dataset_path, "false_extra_files_path", None)
Expand Down Expand Up @@ -363,6 +373,9 @@ class DatasetCollectionWrapper(ToolParameterValueWrapper, HasDatasets):
def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kwargs):
super(DatasetCollectionWrapper, self).__init__()
self.job_working_directory = job_working_directory
self._dataset_elements_cache = {}
self.dataset_paths = dataset_paths
self.kwargs = kwargs

if has_collection is None:
self.__input_supplied = False
Expand All @@ -381,6 +394,7 @@ def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kw
else:
collection = has_collection
self.name = None
self.collection = collection

elements = collection.elements
element_instances = odict.odict()
Expand All @@ -401,6 +415,16 @@ def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kw
self.__element_instances = element_instances
self.__element_instance_list = element_instance_list

def get_datasets_for_group(self, group):
group = text_type(group).lower()
if not self._dataset_elements_cache.get(group):
wrappers = []
for element in self.collection.dataset_elements:
if any([t for t in element.dataset_instance.tags if t.user_tname.lower() == 'group' and t.value.lower() == group]):
wrappers.append(self._dataset_wrapper(element.element_object, self.dataset_paths, identifier=element.element_identifier, **self.kwargs))
self._dataset_elements_cache[group] = wrappers
return self._dataset_elements_cache[group]

def keys(self):
if not self.__input_supplied:
return []
Expand Down
14 changes: 10 additions & 4 deletions lib/galaxy/tools/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -2099,6 +2099,11 @@ $attribute_list:checked,truevalue,falsevalue:5

A dataset from the current history. Multiple types might be used for the param form.

#### ``group_tag``

$attribute_list:multiple,date_ref:5


##### Examples

The following will find all "coordinate interval files" contained within the
Expand Down Expand Up @@ -2356,10 +2361,10 @@ as a comma separated list.
<xs:annotation>
<xs:documentation xml:lang="en"><![CDATA[

Only valid if ``type`` attribute value is ``select`` or ``data_column``. Used
with select lists whose options are dynamically generated based on certain
metadata attributes of the dataset upon which this parameter depends (usually
but not always the tool's input dataset).
Only valid if ``type`` attribute value is ``select``, ``data_column``, or
``group_tag``. Used with select lists whose options are dynamically generated
based on certain metadata attributes of the dataset or collection upon which
this parameter depends (usually but not always the tool's input dataset).

]]></xs:documentation>
</xs:annotation>
Expand Down Expand Up @@ -2509,6 +2514,7 @@ allow access to Python code to generate options for a select list. See
<xs:enumeration value="file"/>
<xs:enumeration value="data"/>
<xs:enumeration value="drill_down"/>
<xs:enumeration value="group_tag"/>
<xs:enumeration value="data_collection"/>
</xs:restriction>
</xs:simpleType>
Expand Down
65 changes: 65 additions & 0 deletions test/api/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -1707,6 +1707,71 @@ def __tool_ids(self):
tool_ids = [_["id"] for _ in tools]
return tool_ids

def test_group_tag_selection(self):
with self.dataset_populator.test_history() as history_id:
input_hdca_id = self.__build_group_list(history_id)
inputs = {
"input1": {"src": "hdca", "id": input_hdca_id},
"group": "condition:treated",
}
self.dataset_populator.wait_for_history(history_id, assert_ok=True)
response = self._run("collection_cat_group_tag", history_id, inputs, assert_ok=True)
outputs = response["outputs"]
self.assertEquals(len(outputs), 1)
output = outputs[0]
output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
self.assertEquals(output_content.strip(), "123\n456")

def test_group_tag_selection_multiple(self):
with self.dataset_populator.test_history() as history_id:
input_hdca_id = self.__build_group_list(history_id)
inputs = {
"input1": {"src": "hdca", "id": input_hdca_id},
"groups": "condition:treated,type:single",
}
self.dataset_populator.wait_for_history(history_id, assert_ok=True)
response = self._run("collection_cat_group_tag_multiple", history_id, inputs, assert_ok=True)
outputs = response["outputs"]
self.assertEquals(len(outputs), 1)
output = outputs[0]
output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
self.assertEquals(output_content.strip(), "123\n456\n456\n0ab")

def __build_group_list(self, history_id):
response = self.dataset_collection_populator.upload_collection(history_id, "list", elements=[
{
"name": "test0",
"src": "pasted",
"paste_content": "123\n",
"ext": "txt",
"tags": ["group:type:paired-end", "group:condition:treated"],
},
{
"name": "test1",
"src": "pasted",
"paste_content": "456\n",
"ext": "txt",
"tags": ["group:type:single", "group:condition:treated"],
},
{
"name": "test2",
"src": "pasted",
"paste_content": "789\n",
"ext": "txt",
"tags": ["group:type:paired-end", "group:condition:untreated"],
},
{
"name": "test3",
"src": "pasted",
"paste_content": "0ab\n",
"ext": "txt",
"tags": ["group:type:single", "group:condition:untreated"],
}
])
self._assert_status_code_is(response, 200)
hdca_list_id = response.json()["outputs"][0]["id"]
return hdca_list_id

def __build_nested_list(self, history_id):
response = self.dataset_collection_populator.upload_collection(history_id, "list:paired", elements=[
{
Expand Down
21 changes: 21 additions & 0 deletions test/functional/tools/collection_cat_group_tag.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<tool id="collection_cat_group_tag" name="Concatenate multiple datasets (based on group tag)">
<description>tail-to-head</description>
<command>
cat
#for $file in $input1.get_datasets_for_group($group):
'$file'
#end for
> '$out_file1'
</command>
<inputs>
<param name="input1" type="data_collection" collection_type="list" label="Concatenate Dataset" multiple="true" />
<param name="group" type="group_tag" data_ref="input1" />
</inputs>
<outputs>
<data name="out_file1" format="input" metadata_source="input1"/>
</outputs>
<tests>
</tests>
<help>
</help>
</tool>
23 changes: 23 additions & 0 deletions test/functional/tools/collection_cat_group_tag_multiple.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<tool id="collection_cat_group_tag_multiple" name="Concatenate multiple datasets (based on group tags)">
<description>tail-to-head</description>
<command>
cat
#for $group in $groups:
#for $file in $input1.get_datasets_for_group($group):
'$file'
#end for
#end for
> '$out_file1'
</command>
<inputs>
<param name="input1" type="data_collection" collection_type="list" label="Concatenate Dataset" multiple="true" />
<param name="groups" type="group_tag" data_ref="input1" multiple="true" />
</inputs>
<outputs>
<data name="out_file1" format="input" metadata_source="input1"/>
</outputs>
<tests>
</tests>
<help>
</help>
</tool>
2 changes: 2 additions & 0 deletions test/functional/tools/samples_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@
<tool file="collection_type_source_map_over.xml" />
<tool file="collection_creates_list_fail.xml" />
<tool file="collection_creates_dynamic_nested_fail.xml" />
<tool file="collection_cat_group_tag.xml" />
<tool file="collection_cat_group_tag_multiple.xml" />
<tool file="cheetah_casting.xml" />

<tool file="cheetah_problem_unbound_var.xml" />
Expand Down
1 change: 1 addition & 0 deletions test/unit/test_objectstore.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ class MockDataset(object):
def __init__(self, id):
self.id = id
self.object_store_id = None
self.tags = []


# Poor man's mocking. Need to get a real mocking library as real Galaxy development
Expand Down
1 change: 1 addition & 0 deletions test/unit/tools/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def __init__(self):
self.file_name = MOCK_DATASET_PATH
self.extra_files_path = MOCK_DATASET_EXTRA_FILES_PATH
self.ext = MOCK_DATASET_EXT
self.tags = []


class MockTool(object):
Expand Down