Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tool that adds/sets tags for collection elements from a file #5462

Merged
merged 6 commits into from
Feb 12, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />
</section>
<section id="liftOver" name="Lift-Over">
<tool file="extract/liftOver_wrapper.xml" />
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/managers/taggable.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _tag_str_gen(item):
def _tags_to_strings(item):
if not hasattr(item, 'tags'):
return None
return list(_tag_str_gen(item))
return sorted(list(_tag_str_gen(item)))


def _tags_from_strings(item, tag_handler, new_tags_list, user=None):
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/managers/tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def apply_item_tag(self, user, item, name, value=None):
item_tag_assoc.user_tname = name
item_tag_assoc.user_value = value
item_tag_assoc.value = lc_value
# Need to flush to get an ID. We need an ID to apply multiple tags with the same tname to an object.
self.sa_session.flush()
return item_tag_assoc

def apply_item_tags(self, user, item, tags_str):
Expand Down
60 changes: 60 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper
from galaxy.managers import histories
from galaxy.managers.jobs import JobSearch
from galaxy.managers.tags import GalaxyTagManager
from galaxy.queue_worker import send_control_task
from galaxy.tools.actions import DefaultToolAction
from galaxy.tools.actions.data_manager import DataManagerToolAction
Expand Down Expand Up @@ -2605,6 +2606,65 @@ def add_copied_value_to_new_elements(new_label, dce_object):
)


class TagFromFileTool(DatabaseOperationTool):
tool_type = 'tag_from_file'

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
hdca = incoming["input"]
how = incoming['how']
new_tags_dataset_assoc = incoming["tags"]
new_elements = odict()
tags_manager = GalaxyTagManager(trans.app.model.context)

def add_copied_value_to_new_elements(new_tags_dict, dce):
if getattr(dce.element_object, "history_content_type", None) == "dataset":
copied_value = dce.element_object.copy()
# copy should never be visible, since part of a collection
copied_value.visble = False
history.add_dataset(copied_value, copied_value, set_hid=False)
new_tags = new_tags_dict.get(dce.element_identifier)
if new_tags:
if how in ('add', 'remove') and dce.element_object.tags:
# We need get the original tags and update them with the new tags
old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag)
if how == 'add':
old_tags.update(set(new_tags))
elif how == 'remove':
old_tags = old_tags - set(new_tags)
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags)
else:
# We have a collection, and we copy the elements so that we don't manipulate the original tags
copied_value = dce.element_object.copy(element_destination=history)
for new_element, old_element in zip(copied_value.dataset_elements, dce.element_object.dataset_elements):
# TODO: This should be eliminated, but collections created by the collection builder
# don't set `visible` to `False` if you don't hide the original elements.
new_element.element_object.visible = False
new_tags = new_tags_dict.get(new_element.element_identifier)
if how in ('add', 'remove'):
old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag)
if new_tags:
if how == 'add':
old_tags.update(set(new_tags))
elif how == 'remove':
old_tags = old_tags - set(new_tags)
new_tags = old_tags
tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags)
new_elements[dce.element_identifier] = copied_value

new_tags_path = new_tags_dataset_assoc.file_name
new_tags = open(new_tags_path, "r").readlines(1024 * 1000000)
# We have a tabular file, where the first column is an existing element identifier,
# and the remaining columns represent new tags.
source_new_tags = (line.strip().split('\t') for line in new_tags)
new_tags_dict = {item[0]: item[1:] for item in source_new_tags}
for i, dce in enumerate(hdca.collection.elements):
add_copied_value_to_new_elements(new_tags_dict, dce)
output_collections.create_collection(
next(iter(self.outputs.values())), "output", elements=new_elements
)


class FilterFromFileTool(DatabaseOperationTool):
tool_type = 'filter_from_file'

Expand Down
91 changes: 91 additions & 0 deletions lib/galaxy/tools/tag_collection_from_file.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
<tool id="__TAG_FROM_FILE__"
name="Tag elements from file"
version="1.0.0"
tool_type="tag_from_file">
<description>from contents of a file</description>
<type class="TagFromFileTool" module="galaxy.tools" />
<action module="galaxy.tools.actions.model_operations"
class="ModelOperationToolAction"/>
<inputs>
<param type="data_collection" name="input" label="Input Collection" help="A tabular file indicating how to tag collection elements."/>
<param type="data" name="tags" format="tabular" label="Tag collection elements according to this file"/>
<param name="how" type="select" label="How should the tags be updated">
<option value="add">New tags will be added, existing tags will be kept</option>
<option value="set">New tags will be added, existing tags will be removed</option>
<option value="remove">The tags listed will be removed</option>
</param>
</inputs>
<outputs>
<collection name="output" format_source="input" type_source="input" label="${on_string} (Tagged)" >
</collection>
</outputs>
<tests>
<test>
<param name="input">
<collection type="list">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</param>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<param name="how" value="add"/>
<output_collection name="output" type="list">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
</element>
</output_collection>
</test>
<test>
<param name="input">
<collection type="list:paired">
<element name="i1">
<collection type="paired">
<element name="forward" value="simple_line.txt" />
<element name="reverse" value="simple_line_alternative.txt" />
</collection>
</element>
</collection>
</param>
<param name="how" value="set"/>
<param name="tags" value="new_tags_1.txt" ftype="txt" />
<output_collection name="output" type="list:paired">
<element name="i1">
<element name="forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:f,alias:r1,orientation:forward" />
</element>
<element name="reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<metadata name="tags" value="alias:r,alias:r2,orientation:reverse" />
</element>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[

.. class:: infomark

This tool will take an input collection and a tabular file,
where the first column indicates an element identifier and the
remaining columns contains the new tags. This file may contain
less entries than elements in the collection.
In that case only matching list identifiers will be tagged.

This tool will create new history datasets from your collection
but your quota usage will not increase.
]]></help>
</tool>
2 changes: 2 additions & 0 deletions test-data/new_tags_1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
forward orientation:forward alias:r1 alias:f
reverse orientation:reverse alias:r2 alias:r
4 changes: 2 additions & 2 deletions test/base/interactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,12 @@ def _verify_metadata(self, history_id, hid, attributes):
"""Check dataset metadata.

ftype on output maps to `file_ext` on the hda's API description, `name`, `info`,
and `dbkey` all map to the API description directly. Other metadata attributes
`dbkey` and `tags` all map to the API description directly. Other metadata attributes
are assumed to be datatype-specific and mapped with a prefix of `metadata_`.
"""
metadata = attributes.get('metadata', {}).copy()
for key, value in metadata.copy().items():
if key not in ['name', 'info']:
if key not in ['name', 'info', 'tags']:
new_key = "metadata_%s" % key
metadata[new_key] = metadata[key]
del metadata[key]
Expand Down
1 change: 1 addition & 0 deletions test/functional/tools/samples_tool_conf.xml
Original file line number Diff line number Diff line change
Expand Up @@ -183,5 +183,6 @@
<tool file="${model_tools_path}/merge_collection.xml" />
<tool file="${model_tools_path}/relabel_from_file.xml" />
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />

</toolbox>