galaxyproject · jmchilton · Aug 7, 2018 · Feb 4, 2018 · Feb 4, 2018 · Feb 7, 2018
diff --git a/lib/galaxy/tools/parameters/basic.py b/lib/galaxy/tools/parameters/basic.py
@@ -848,9 +848,9 @@ def get_legal_values(self, trans, other_values):
         else:
             return self.legal_values
 
-    def from_json(self, value, trans, other_values={}):
+    def from_json(self, value, trans, other_values={}, require_legal_value=True):
         legal_values = self.get_legal_values(trans, other_values)
-        if not legal_values and is_runtime_context(trans, other_values):
+        if (not legal_values or not require_legal_value) and is_runtime_context(trans, other_values):
             if self.multiple:
                 # While it is generally allowed that a select value can be '',
                 # we do not allow this to be the case in a dynamically
@@ -888,7 +888,7 @@ def from_json(self, value, trans, other_values={}):
                         return []
                     else:
                         raise ValueError("No option was selected for %s but input is not optional." % self.name)
-            if value not in legal_values:
+            if value not in legal_values and require_legal_value:
                 raise ValueError("An invalid option was selected for %s, %r, please verify." % (self.name, value))
             return value
 
@@ -1028,6 +1028,99 @@ def _get_dbkey_names(self, trans=None):
         return self.tool.app.genome_builds.get_genome_build_names(trans=trans)
 
 
+class SelectTagParameter(SelectToolParameter):
+    """
+    Select set that is composed of a set of tags available for an input.
+    """
+    def __init__(self, tool, input_source):
+        input_source = ensure_input_source(input_source)
+        SelectToolParameter.__init__(self, tool, input_source)
+        self.tool = tool
+        self.tag_key = input_source.get("group", False)
+        self.optional = input_source.get("optional", False)
+        self.multiple = input_source.get("multiple", False)
+        self.accept_default = input_source.get_bool("accept_default", False)
+        if self.accept_default:
+            self.optional = True
+        self.data_ref = input_source.get("data_ref", None)
+        self.ref_input = None
+        # Legacy style default value specification...
+        self.default_value = input_source.get("default_value", None)
+        if self.default_value is None:
+            # Newer style... more in line with other parameters.
+            self.default_value = input_source.get("value", None)
+        self.is_dynamic = True
+
+    def from_json(self, value, trans, other_values={}):
+        if self.multiple:
+            tag_list = []
+            # split on newline and ,
+            if isinstance(value, list) or isinstance(value, string_types):
+                if not isinstance(value, list):
+                    value = value.split('\n')
+                for tag_str in value:
+                    for tag in str(tag_str).split(','):
+                        tag = tag.strip()
+                        if tag:
+                            tag_list.append(tag)
+            value = tag_list
+        else:
+            if not value:
+                value = None
+        # We skip requiring legal values -- this is similar to optional, but allows only subset of datasets to be positive
+        # TODO: May not actually be required for (nested) collection input ?
+        return super(SelectTagParameter, self).from_json(value, trans, other_values, require_legal_value=False)
+
+    def get_tag_list(self, other_values):
+        """
+        Generate a select list containing the tags of the associated dataset (if found).
+        """
+        # Get the value of the associated data reference (a dataset)
+        history_items = other_values.get(self.data_ref, None)
+        # Check if a dataset is selected
+        if not history_items:
+            return []
+        tags = set()
+        for history_item in util.listify(history_items):
+            if hasattr(history_item, 'dataset_instances'):
+                for dataset in history_item.dataset_instances:
+                    for tag in dataset.tags:
+                        if tag.user_tname == 'group':
+                            tags.add(tag.user_value)
+            else:
+                for tag in history_item.tags:
+                    if tag.user_tname == 'group':
+                        tags.add(tag.user_value)
+        return list(tags)
+
+    def get_options(self, trans, other_values):
+        """
+        Show tags
+        """
+        options = []
+        for tag in self.get_tag_list(other_values):
+            options.append(('Tags: ' + tag, tag, False))
+        return options
+
+    def get_initial_value(self, trans, other_values):
+        if self.default_value is not None:
+            return self.default_value
+        return SelectToolParameter.get_initial_value(self, trans, other_values)
+
+    def get_legal_values(self, trans, other_values):
+        if self.data_ref not in other_values:
+            raise ValueError("Value for associated data reference not found (data_ref).")
+        return set(self.get_tag_list(other_values))
+
+    def get_dependencies(self):
+        return [self.data_ref]
+
+    def to_dict(self, trans, other_values={}):
+        d = super(SelectTagParameter, self).to_dict(trans, other_values=other_values)
+        d['data_ref'] = self.data_ref
+        return d
+
+
 class ColumnListParameter(SelectToolParameter):
     """
     Select list that consists of either the total number of columns or only
@@ -2212,6 +2305,7 @@ def to_text(self, value):
     genomebuild=GenomeBuildParameter,
     select=SelectToolParameter,
     color=ColorToolParameter,
+    group_tag=SelectTagParameter,
     data_column=ColumnListParameter,
     hidden=HiddenToolParameter,
     hidden_data=HiddenDataToolParameter,

diff --git a/lib/galaxy/tools/wrappers.py b/lib/galaxy/tools/wrappers.py
@@ -2,7 +2,7 @@
 import os
 import tempfile
 
-from six import string_types
+from six import string_types, text_type
 from six.moves import shlex_quote
 
 from galaxy import exceptions
@@ -171,6 +171,11 @@ def __add__(self, x):
     def __getattr__(self, key):
         return getattr(self.input, key)
 
+    def __iter__(self):
+        if not self.input.multiple:
+            raise Exception("Tried to iterate over a non-multiple parameter.")
+        return self.value.__iter__()
+
 
 class DatasetFilenameWrapper(ToolParameterValueWrapper):
     """
@@ -233,6 +238,11 @@ def __init__(self, dataset, datatypes_registry=None, tool=None, name=None, datas
             self.unsanitized = dataset
             self.dataset = wrap_with_safe_string(dataset, no_wrap_classes=ToolParameterValueWrapper)
             self.metadata = self.MetadataWrapper(dataset.metadata)
+            if hasattr(dataset, 'tags'):
+                self.groups = {tag.user_value.lower() for tag in dataset.tags if tag.user_tname == 'group'}
+            else:
+                # May be a 'FakeDatasetAssociation'
+                self.groups = set()
         self.datatypes_registry = datatypes_registry
         self.false_path = getattr(dataset_path, "false_path", None)
         self.false_extra_files_path = getattr(dataset_path, "false_extra_files_path", None)
@@ -363,6 +373,9 @@ class DatasetCollectionWrapper(ToolParameterValueWrapper, HasDatasets):
     def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kwargs):
         super(DatasetCollectionWrapper, self).__init__()
         self.job_working_directory = job_working_directory
+        self._dataset_elements_cache = {}
+        self.dataset_paths = dataset_paths
+        self.kwargs = kwargs
 
         if has_collection is None:
             self.__input_supplied = False
@@ -381,6 +394,7 @@ def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kw
         else:
             collection = has_collection
             self.name = None
+        self.collection = collection
 
         elements = collection.elements
         element_instances = odict.odict()
@@ -401,6 +415,16 @@ def __init__(self, job_working_directory, has_collection, dataset_paths=[], **kw
         self.__element_instances = element_instances
         self.__element_instance_list = element_instance_list
 
+    def get_datasets_for_group(self, group):
+        group = text_type(group).lower()
+        if not self._dataset_elements_cache.get(group):
+            wrappers = []
+            for element in self.collection.dataset_elements:
+                if any([t for t in element.dataset_instance.tags if t.user_tname.lower() == 'group' and t.value.lower() == group]):
+                    wrappers.append(self._dataset_wrapper(element.element_object, self.dataset_paths, identifier=element.element_identifier, **self.kwargs))
+            self._dataset_elements_cache[group] = wrappers
+        return self._dataset_elements_cache[group]
+
     def keys(self):
         if not self.__input_supplied:
             return []

diff --git a/lib/galaxy/tools/xsd/galaxy.xsd b/lib/galaxy/tools/xsd/galaxy.xsd
@@ -2099,6 +2099,11 @@ $attribute_list:checked,truevalue,falsevalue:5
 
 A dataset from the current history. Multiple types might be used for the param form.
 
+#### ``group_tag``
+
+$attribute_list:multiple,date_ref:5
+
+
 ##### Examples
 
 The following will find all "coordinate interval files" contained within the
@@ -2356,10 +2361,10 @@ as a comma separated list.
           <xs:annotation>
             <xs:documentation xml:lang="en"><![CDATA[
 
-Only valid if ``type`` attribute value is ``select`` or ``data_column``. Used
-with select lists whose options are dynamically generated based on certain
-metadata attributes of the dataset upon which this parameter depends (usually
-but not always the tool's input dataset).
+Only valid if ``type`` attribute value is ``select``, ``data_column``, or
+``group_tag``. Used with select lists whose options are dynamically generated
+based on certain metadata attributes of the dataset or collection upon which
+this parameter depends (usually but not always the tool's input dataset).
 
             ]]></xs:documentation>
           </xs:annotation>
@@ -2509,6 +2514,7 @@ allow access to Python code to generate options for a select list. See
       <xs:enumeration value="file"/>
       <xs:enumeration value="data"/>
       <xs:enumeration value="drill_down"/>
+      <xs:enumeration value="group_tag"/>
       <xs:enumeration value="data_collection"/>
     </xs:restriction>
   </xs:simpleType>

diff --git a/test/api/test_tools.py b/test/api/test_tools.py
@@ -1707,6 +1707,71 @@ def __tool_ids(self):
         tool_ids = [_["id"] for _ in tools]
         return tool_ids
 
+    def test_group_tag_selection(self):
+        with self.dataset_populator.test_history() as history_id:
+            input_hdca_id = self.__build_group_list(history_id)
+            inputs = {
+                "input1": {"src": "hdca", "id": input_hdca_id},
+                "group": "condition:treated",
+            }
+            self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+            response = self._run("collection_cat_group_tag", history_id, inputs, assert_ok=True)
+            outputs = response["outputs"]
+            self.assertEquals(len(outputs), 1)
+            output = outputs[0]
+            output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
+            self.assertEquals(output_content.strip(), "123\n456")
+
+    def test_group_tag_selection_multiple(self):
+        with self.dataset_populator.test_history() as history_id:
+            input_hdca_id = self.__build_group_list(history_id)
+            inputs = {
+                "input1": {"src": "hdca", "id": input_hdca_id},
+                "groups": "condition:treated,type:single",
+            }
+            self.dataset_populator.wait_for_history(history_id, assert_ok=True)
+            response = self._run("collection_cat_group_tag_multiple", history_id, inputs, assert_ok=True)
+            outputs = response["outputs"]
+            self.assertEquals(len(outputs), 1)
+            output = outputs[0]
+            output_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=output)
+            self.assertEquals(output_content.strip(), "123\n456\n456\n0ab")
+
+    def __build_group_list(self, history_id):
+        response = self.dataset_collection_populator.upload_collection(history_id, "list", elements=[
+            {
+                "name": "test0",
+                "src": "pasted",
+                "paste_content": "123\n",
+                "ext": "txt",
+                "tags": ["group:type:paired-end", "group:condition:treated"],
+            },
+            {
+                "name": "test1",
+                "src": "pasted",
+                "paste_content": "456\n",
+                "ext": "txt",
+                "tags": ["group:type:single", "group:condition:treated"],
+            },
+            {
+                "name": "test2",
+                "src": "pasted",
+                "paste_content": "789\n",
+                "ext": "txt",
+                "tags": ["group:type:paired-end", "group:condition:untreated"],
+            },
+            {
+                "name": "test3",
+                "src": "pasted",
+                "paste_content": "0ab\n",
+                "ext": "txt",
+                "tags": ["group:type:single", "group:condition:untreated"],
+            }
+        ])
+        self._assert_status_code_is(response, 200)
+        hdca_list_id = response.json()["outputs"][0]["id"]
+        return hdca_list_id
+
     def __build_nested_list(self, history_id):
         response = self.dataset_collection_populator.upload_collection(history_id, "list:paired", elements=[
             {

diff --git a/test/functional/tools/collection_cat_group_tag.xml b/test/functional/tools/collection_cat_group_tag.xml
@@ -0,0 +1,21 @@
+<tool id="collection_cat_group_tag" name="Concatenate multiple datasets (based on group tag)">
+    <description>tail-to-head</description>
+    <command>
+        cat
+        #for $file in $input1.get_datasets_for_group($group):
+            '$file'
+        #end for
+        > '$out_file1'
+    </command>
+    <inputs>
+        <param name="input1" type="data_collection" collection_type="list" label="Concatenate Dataset" multiple="true" />
+        <param name="group" type="group_tag" data_ref="input1" />
+    </inputs>
+    <outputs>
+        <data name="out_file1" format="input" metadata_source="input1"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+    </help>
+</tool>
diff --git a/test/functional/tools/collection_cat_group_tag_multiple.xml b/test/functional/tools/collection_cat_group_tag_multiple.xml
@@ -0,0 +1,23 @@
+<tool id="collection_cat_group_tag_multiple" name="Concatenate multiple datasets (based on group tags)">
+    <description>tail-to-head</description>
+    <command>
+        cat
+        #for $group in $groups:
+        #for $file in $input1.get_datasets_for_group($group):
+            '$file'
+        #end for
+        #end for
+        > '$out_file1'
+    </command>
+    <inputs>
+        <param name="input1" type="data_collection" collection_type="list" label="Concatenate Dataset" multiple="true" />
+        <param name="groups" type="group_tag" data_ref="input1" multiple="true" />
+    </inputs>
+    <outputs>
+        <data name="out_file1" format="input" metadata_source="input1"/>
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+    </help>
+</tool>
diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml
@@ -131,6 +131,8 @@
   <tool file="collection_type_source_map_over.xml" />
   <tool file="collection_creates_list_fail.xml" />
   <tool file="collection_creates_dynamic_nested_fail.xml" />
+  <tool file="collection_cat_group_tag.xml" />
+  <tool file="collection_cat_group_tag_multiple.xml" />
   <tool file="cheetah_casting.xml" />
 
   <tool file="cheetah_problem_unbound_var.xml" />

diff --git a/test/unit/test_objectstore.py b/test/unit/test_objectstore.py
@@ -224,6 +224,7 @@ class MockDataset(object):
     def __init__(self, id):
         self.id = id
         self.object_store_id = None
+        self.tags = []
 
 
 # Poor man's mocking. Need to get a real mocking library as real Galaxy development

diff --git a/test/unit/tools/test_wrappers.py b/test/unit/tools/test_wrappers.py
@@ -168,6 +168,7 @@ def __init__(self):
         self.file_name = MOCK_DATASET_PATH
         self.extra_files_path = MOCK_DATASET_EXTRA_FILES_PATH
         self.ext = MOCK_DATASET_EXT
+        self.tags = []
 
 
 class MockTool(object):