From b114f3f7b9d483509e7d684aa8be54e769fddbf7 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 5 Feb 2018 10:05:19 +0100 Subject: [PATCH 1/6] Add tool that tags collection elements from a file This tags collection elements (but not collections). --- config/tool_conf.xml.sample | 1 + lib/galaxy/tools/__init__.py | 54 ++++++++++++ lib/galaxy/tools/tag_collection_from_file.xml | 85 +++++++++++++++++++ test-data/new_tags_1.txt | 2 + 4 files changed, 142 insertions(+) create mode 100644 lib/galaxy/tools/tag_collection_from_file.xml create mode 100644 test-data/new_tags_1.txt diff --git a/config/tool_conf.xml.sample b/config/tool_conf.xml.sample index ae8bc9b96685..581bca8e16d2 100644 --- a/config/tool_conf.xml.sample +++ b/config/tool_conf.xml.sample @@ -36,6 +36,7 @@ + diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 928b2bfee69b..cc1500b8c876 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -28,6 +28,7 @@ from galaxy.datatypes.metadata import JobExternalOutputMetadataWrapper from galaxy.managers import histories from galaxy.managers.jobs import JobSearch +from galaxy.managers.tags import GalaxyTagManager from galaxy.queue_worker import send_control_task from galaxy.tools.actions import DefaultToolAction from galaxy.tools.actions.data_manager import DataManagerToolAction @@ -2605,6 +2606,59 @@ def add_copied_value_to_new_elements(new_label, dce_object): ) +class TagFromFileTool(DatabaseOperationTool): + tool_type = 'tag_from_file' + + def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds): + hdca = incoming["input"] + set_tags = incoming['set_tags'] + new_tags_dataset_assoc = incoming["tags"] + new_elements = odict() + tags_manager = GalaxyTagManager(trans.app.model.context) + + def add_copied_value_to_new_elements(new_tags_dict, dce): + if getattr(dce.element_object, "history_content_type", None) == "dataset": + copied_value = dce.element_object.copy() + # copy should never be visible, since part of a collection + copied_value.visble = False + history.add_dataset(copied_value, copied_value, set_hid=False) + new_tags = new_tags_dict.get(dce.element_identifier) + if new_tags: + if not set_tags and dce.element_object.tags: + # We need get the original tags and update them with the new tags + old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag) + old_tags.update(set(new_tags)) + new_tags = old_tags + tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags) + else: + # We have a collection, and we copy the elements so that we don't manipulate the original tags + copied_value = dce.element_object.copy(element_destination=history) + for new_element, old_element in zip(copied_value.dataset_elements, dce.element_object.dataset_elements): + # TODO: This should be eliminated, but collections created by the collection builder + # don't set `visible` to `False` if you don't hide the original elements. + new_element.element_object.visible = False + new_tags = new_tags_dict.get(new_element.element_identifier) + if not set_tags: + old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag) + if new_tags: + old_tags.update(set(new_tags)) + new_tags = old_tags + tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags) + new_elements[dce.element_identifier] = copied_value + + new_tags_path = new_tags_dataset_assoc.file_name + new_tags = open(new_tags_path, "r").readlines(1024 * 1000000) + # We have a tabular file, where the first column is an existing element identifier, + # and the remaining columns represent new tags. + source_new_tags = (line.strip().split('\t') for line in new_tags) + new_tags_dict = {item[0]: item[1:] for item in source_new_tags} + for i, dce in enumerate(hdca.collection.elements): + add_copied_value_to_new_elements(new_tags_dict, dce) + output_collections.create_collection( + next(iter(self.outputs.values())), "output", elements=new_elements + ) + + class FilterFromFileTool(DatabaseOperationTool): tool_type = 'filter_from_file' diff --git a/lib/galaxy/tools/tag_collection_from_file.xml b/lib/galaxy/tools/tag_collection_from_file.xml new file mode 100644 index 000000000000..497b16fd003b --- /dev/null +++ b/lib/galaxy/tools/tag_collection_from_file.xml @@ -0,0 +1,85 @@ + + from contents of a file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/test-data/new_tags_1.txt b/test-data/new_tags_1.txt new file mode 100644 index 000000000000..2296bf5f7a77 --- /dev/null +++ b/test-data/new_tags_1.txt @@ -0,0 +1,2 @@ +forward orientation:forward alias:r1 +reverse orientation:reverse alias:r2 From b2be7b51814a4354307e8f0a0df0c8885ef73bb0 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 5 Feb 2018 18:22:19 +0100 Subject: [PATCH 2/6] Sort tags when serializing them --- lib/galaxy/managers/taggable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/managers/taggable.py b/lib/galaxy/managers/taggable.py index edee585460ed..dc4508abefac 100644 --- a/lib/galaxy/managers/taggable.py +++ b/lib/galaxy/managers/taggable.py @@ -24,7 +24,7 @@ def _tag_str_gen(item): def _tags_to_strings(item): if not hasattr(item, 'tags'): return None - return list(_tag_str_gen(item)) + return sorted(list(_tag_str_gen(item))) def _tags_from_strings(item, tag_handler, new_tags_list, user=None): From f9664a0863b24ff738aefcbe520f033e5f53e76d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 5 Feb 2018 18:22:45 +0100 Subject: [PATCH 3/6] Make presence of tags testable --- test/base/interactor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/base/interactor.py b/test/base/interactor.py index 9ea5ed0318c2..7bf1c0d16a0f 100644 --- a/test/base/interactor.py +++ b/test/base/interactor.py @@ -107,12 +107,12 @@ def _verify_metadata(self, history_id, hid, attributes): """Check dataset metadata. ftype on output maps to `file_ext` on the hda's API description, `name`, `info`, - and `dbkey` all map to the API description directly. Other metadata attributes + `dbkey` and `tags` all map to the API description directly. Other metadata attributes are assumed to be datatype-specific and mapped with a prefix of `metadata_`. """ metadata = attributes.get('metadata', {}).copy() for key, value in metadata.copy().items(): - if key not in ['name', 'info']: + if key not in ['name', 'info', 'tags']: new_key = "metadata_%s" % key metadata[new_key] = metadata[key] del metadata[key] From fd373501c78dee268844e82fa1a56a108d34cf23 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 6 Feb 2018 15:28:13 +0100 Subject: [PATCH 4/6] Add tag from file tool to samples_tool_conf.xml --- test/functional/tools/samples_tool_conf.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/test/functional/tools/samples_tool_conf.xml b/test/functional/tools/samples_tool_conf.xml index 930ee0c35027..7548b2df3850 100644 --- a/test/functional/tools/samples_tool_conf.xml +++ b/test/functional/tools/samples_tool_conf.xml @@ -183,5 +183,6 @@ + From e306a447c080f8d252a9a7fe49f4d36f6a8de4a9 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 6 Feb 2018 16:56:48 +0100 Subject: [PATCH 5/6] Switch tag tool to use select and add remove mode --- config/tool_conf.xml.sample | 2 +- lib/galaxy/tools/__init__.py | 16 ++++-- lib/galaxy/tools/tag_collection_from_file.xml | 52 +++++++++++-------- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/config/tool_conf.xml.sample b/config/tool_conf.xml.sample index 581bca8e16d2..890a6f1a183e 100644 --- a/config/tool_conf.xml.sample +++ b/config/tool_conf.xml.sample @@ -36,10 +36,10 @@ - +
diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index cc1500b8c876..db471241b614 100755 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -2611,7 +2611,7 @@ class TagFromFileTool(DatabaseOperationTool): def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds): hdca = incoming["input"] - set_tags = incoming['set_tags'] + how = incoming['how'] new_tags_dataset_assoc = incoming["tags"] new_elements = odict() tags_manager = GalaxyTagManager(trans.app.model.context) @@ -2624,10 +2624,13 @@ def add_copied_value_to_new_elements(new_tags_dict, dce): history.add_dataset(copied_value, copied_value, set_hid=False) new_tags = new_tags_dict.get(dce.element_identifier) if new_tags: - if not set_tags and dce.element_object.tags: + if how in ('add', 'remove') and dce.element_object.tags: # We need get the original tags and update them with the new tags old_tags = set(tag for tag in tags_manager.get_tags_str(dce.element_object.tags).split(',') if tag) - old_tags.update(set(new_tags)) + if how == 'add': + old_tags.update(set(new_tags)) + elif how == 'remove': + old_tags = old_tags - set(new_tags) new_tags = old_tags tags_manager.add_tags_from_list(user=history.user, item=copied_value, new_tags_list=new_tags) else: @@ -2638,10 +2641,13 @@ def add_copied_value_to_new_elements(new_tags_dict, dce): # don't set `visible` to `False` if you don't hide the original elements. new_element.element_object.visible = False new_tags = new_tags_dict.get(new_element.element_identifier) - if not set_tags: + if how in ('add', 'remove'): old_tags = set(tag for tag in tags_manager.get_tags_str(old_element.element_object.tags).split(',') if tag) if new_tags: - old_tags.update(set(new_tags)) + if how == 'add': + old_tags.update(set(new_tags)) + elif how == 'remove': + old_tags = old_tags - set(new_tags) new_tags = old_tags tags_manager.add_tags_from_list(user=history.user, item=new_element.element_object, new_tags_list=new_tags) new_elements[dce.element_identifier] = copied_value diff --git a/lib/galaxy/tools/tag_collection_from_file.xml b/lib/galaxy/tools/tag_collection_from_file.xml index 497b16fd003b..3666a386c415 100644 --- a/lib/galaxy/tools/tag_collection_from_file.xml +++ b/lib/galaxy/tools/tag_collection_from_file.xml @@ -8,8 +8,12 @@ class="ModelOperationToolAction"/> - - + + + + + + @@ -24,18 +28,19 @@ + - - - - + + + + - - - - + + + + @@ -50,22 +55,23 @@ + - - - - - - - - - - - - + + + + + + + + + + + + + - From d14bae73f72cd454ef41bb1815cf1dfc9256db5d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 7 Feb 2018 10:46:39 +0100 Subject: [PATCH 6/6] Flush once per tag Not sure this is the right thing to do (maybe there is a better way to get an ID?), but it avoids ``` IntegrityError: (psycopg2.IntegrityError) duplicate key value violates unique constraint "tag_name_key" DETAIL: Key (name)=(group) already exists. [SQL: 'INSERT INTO tag (type, parent_id, name) VALUES (%(type)s, %(parent_id)s, %(name)s) RETURNING tag.id'] [parameters: {'parent_id': None, 'type': 0, 'name': u'group'}] galaxy.tools.execute WARNING 2018-02-06 15:22:38,808 [p:2122,w:1,m:0] [uWSGIWorker1Core1] There was a failure executing a job for tool [__TAG_FROM_FILE__] - Error executing tool: (psycopg2.IntegrityError) duplicate key value violates unique constraint "tag_name_key" ``` --- lib/galaxy/managers/tags.py | 2 ++ lib/galaxy/tools/tag_collection_from_file.xml | 8 ++++---- test-data/new_tags_1.txt | 4 ++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/galaxy/managers/tags.py b/lib/galaxy/managers/tags.py index b3d3b0743f97..8e37c13ed57b 100644 --- a/lib/galaxy/managers/tags.py +++ b/lib/galaxy/managers/tags.py @@ -169,6 +169,8 @@ def apply_item_tag(self, user, item, name, value=None): item_tag_assoc.user_tname = name item_tag_assoc.user_value = value item_tag_assoc.value = lc_value + # Need to flush to get an ID. We need an ID to apply multiple tags with the same tname to an object. + self.sa_session.flush() return item_tag_assoc def apply_item_tags(self, user, item, tags_str): diff --git a/lib/galaxy/tools/tag_collection_from_file.xml b/lib/galaxy/tools/tag_collection_from_file.xml index 3666a386c415..36b2935a7b78 100644 --- a/lib/galaxy/tools/tag_collection_from_file.xml +++ b/lib/galaxy/tools/tag_collection_from_file.xml @@ -34,13 +34,13 @@ - + - + @@ -63,13 +63,13 @@ - + - + diff --git a/test-data/new_tags_1.txt b/test-data/new_tags_1.txt index 2296bf5f7a77..91caa6c4fae3 100644 --- a/test-data/new_tags_1.txt +++ b/test-data/new_tags_1.txt @@ -1,2 +1,2 @@ -forward orientation:forward alias:r1 -reverse orientation:reverse alias:r2 +forward orientation:forward alias:r1 alias:f +reverse orientation:reverse alias:r2 alias:r