From beb4842b8d8ddaa25995425c3dbeb50acfde41de Mon Sep 17 00:00:00 2001 From: John Chilton Date: Wed, 2 Oct 2024 00:10:44 -0400 Subject: [PATCH] Allow multiple xrefs in biotools_mappings.tsv. --- .../ontologies/biotools_mappings.tsv | 2 +- .../tool_util/ontologies/ontology_data.py | 23 +++++++------- test/unit/tool_util/test_ontologies.py | 30 +++++++++++++++++++ 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv index e3a8abedf495..9cbb1f041360 100644 --- a/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv +++ b/lib/galaxy/tool_util/ontologies/biotools_mappings.tsv @@ -44,6 +44,7 @@ gffcompare gffcompare sample_seqs biopython seq_filter_by_id biopython rxlr_motifs signalp +rxlr_motifs hmmer2 crossmap_bam crossmap samtools_bam_to_cram samtools crossmap_region crossmap @@ -260,7 +261,6 @@ fraggenescan fraggenescan repeatmodeler repeatmodeler promoter2 promoter Psortb psortb -rxlr_motifs signalp hmmer2 signalp3 signalp tmhmm2 tmhmm wolf_psort wolf_psort diff --git a/lib/galaxy/tool_util/ontologies/ontology_data.py b/lib/galaxy/tool_util/ontologies/ontology_data.py index 67822130e428..63a4d573b4d7 100644 --- a/lib/galaxy/tool_util/ontologies/ontology_data.py +++ b/lib/galaxy/tool_util/ontologies/ontology_data.py @@ -1,3 +1,4 @@ +from collections import defaultdict from typing import ( cast, Dict, @@ -34,13 +35,11 @@ def _read_ontology_data_text(filename: str) -> str: EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv" BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME) -BIOTOOLS_MAPPING: Dict[str, str] = dict( - [ - cast(Tuple[str, str], tuple(x.split("\t"))) - for x in BIOTOOLS_MAPPING_CONTENT.splitlines() - if not x.startswith("#") - ] -) +BIOTOOLS_MAPPING: Dict[str, List[str]] = defaultdict(list) +for line in BIOTOOLS_MAPPING_CONTENT.splitlines(): + if not line.startswith("#"): + tool_id, xref = line.split("\t") + BIOTOOLS_MAPPING[tool_id].append(xref) EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME) EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT) @@ -61,11 +60,11 @@ def biotools_reference(xrefs): return None -def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]: +def legacy_biotools_external_reference(all_ids: List[str]) -> List[str]: for tool_id in all_ids: if tool_id in BIOTOOLS_MAPPING: return BIOTOOLS_MAPPING[tool_id] - return None + return [] def expand_ontology_data( @@ -74,9 +73,9 @@ def expand_ontology_data( xrefs = tool_source.parse_xrefs() has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs) if not has_biotools_reference: - legacy_biotools_ref = legacy_biotools_external_reference(all_ids) - if legacy_biotools_ref is not None: - xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"}) + for legacy_biotools_ref in legacy_biotools_external_reference(all_ids): + if legacy_biotools_ref is not None: + xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"}) edam_operations = tool_source.parse_edam_operations() edam_topics = tool_source.parse_edam_topics() diff --git a/test/unit/tool_util/test_ontologies.py b/test/unit/tool_util/test_ontologies.py index 3991d358c3db..5c1faca4771f 100644 --- a/test/unit/tool_util/test_ontologies.py +++ b/test/unit/tool_util/test_ontologies.py @@ -46,6 +46,22 @@ type: integer """ +TOOL_YAML_NO_EXPLICIT_XREFS = """ +name: "Bowtie Mapper" +class: GalaxyTool +id: sort1 +version: 1.0.2 +description: "The Bowtie Mapper" +command: "bowtie --map-the-stuff" +outputs: + out1: + format: bam + from_work_dir: out1.bam +inputs: + - name: input1 + type: integer +""" + def test_parse_edam_empty(): test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_1) @@ -66,3 +82,17 @@ def test_parse_edam_mapping_operations_legacy(): ontology_data = expand_ontology_data(test_source, ["sort1"], None) assert ontology_data.edam_operations == ["operation_3802"] assert ontology_data.edam_topics == [] + + +def test_parse_biotools_default_mapping(): + test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS) + ontology_data = expand_ontology_data(test_source, ["cheetah_problem_unbound_var_input"], None) + assert ontology_data.xrefs[0]["reftype"] == "bio.tools" + assert ontology_data.xrefs[0]["value"] == "bwa" + + test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS) + ontology_data = expand_ontology_data(test_source, ["rxlr_motifs"], None) + assert len(ontology_data.xrefs) == 2 + values = [x["value"] for x in ontology_data.xrefs] + assert "signalp" in values + assert "hmmer2" in values