Skip to content

Commit

Permalink
Allow multiple xrefs in biotools_mappings.tsv.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmchilton committed Oct 2, 2024
1 parent 9c3137c commit beb4842
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 13 deletions.
2 changes: 1 addition & 1 deletion lib/galaxy/tool_util/ontologies/biotools_mappings.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ gffcompare gffcompare
sample_seqs biopython
seq_filter_by_id biopython
rxlr_motifs signalp
rxlr_motifs hmmer2
crossmap_bam crossmap
samtools_bam_to_cram samtools
crossmap_region crossmap
Expand Down Expand Up @@ -260,7 +261,6 @@ fraggenescan fraggenescan
repeatmodeler repeatmodeler
promoter2 promoter
Psortb psortb
rxlr_motifs signalp hmmer2
signalp3 signalp
tmhmm2 tmhmm
wolf_psort wolf_psort
Expand Down
23 changes: 11 additions & 12 deletions lib/galaxy/tool_util/ontologies/ontology_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from typing import (
cast,
Dict,
Expand Down Expand Up @@ -34,13 +35,11 @@ def _read_ontology_data_text(filename: str) -> str:
EDAM_TOPIC_MAPPING_FILENAME = "edam_topic_mappings.tsv"

BIOTOOLS_MAPPING_CONTENT = _read_ontology_data_text(BIOTOOLS_MAPPING_FILENAME)
BIOTOOLS_MAPPING: Dict[str, str] = dict(
[
cast(Tuple[str, str], tuple(x.split("\t")))
for x in BIOTOOLS_MAPPING_CONTENT.splitlines()
if not x.startswith("#")
]
)
BIOTOOLS_MAPPING: Dict[str, List[str]] = defaultdict(list)
for line in BIOTOOLS_MAPPING_CONTENT.splitlines():
if not line.startswith("#"):
tool_id, xref = line.split("\t")
BIOTOOLS_MAPPING[tool_id].append(xref)
EDAM_OPERATION_MAPPING_CONTENT = _read_ontology_data_text(EDAM_OPERATION_MAPPING_FILENAME)
EDAM_OPERATION_MAPPING: Dict[str, List[str]] = _multi_dict_mapping(EDAM_OPERATION_MAPPING_CONTENT)

Expand All @@ -61,11 +60,11 @@ def biotools_reference(xrefs):
return None


def legacy_biotools_external_reference(all_ids: List[str]) -> Optional[str]:
def legacy_biotools_external_reference(all_ids: List[str]) -> List[str]:
for tool_id in all_ids:
if tool_id in BIOTOOLS_MAPPING:
return BIOTOOLS_MAPPING[tool_id]
return None
return []


def expand_ontology_data(
Expand All @@ -74,9 +73,9 @@ def expand_ontology_data(
xrefs = tool_source.parse_xrefs()
has_biotools_reference = any(x["reftype"] == "bio.tools" for x in xrefs)
if not has_biotools_reference:
legacy_biotools_ref = legacy_biotools_external_reference(all_ids)
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})
for legacy_biotools_ref in legacy_biotools_external_reference(all_ids):
if legacy_biotools_ref is not None:
xrefs.append({"value": legacy_biotools_ref, "reftype": "bio.tools"})

edam_operations = tool_source.parse_edam_operations()
edam_topics = tool_source.parse_edam_topics()
Expand Down
30 changes: 30 additions & 0 deletions test/unit/tool_util/test_ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,22 @@
type: integer
"""

TOOL_YAML_NO_EXPLICIT_XREFS = """
name: "Bowtie Mapper"
class: GalaxyTool
id: sort1
version: 1.0.2
description: "The Bowtie Mapper"
command: "bowtie --map-the-stuff"
outputs:
out1:
format: bam
from_work_dir: out1.bam
inputs:
- name: input1
type: integer
"""


def test_parse_edam_empty():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_1)
Expand All @@ -66,3 +82,17 @@ def test_parse_edam_mapping_operations_legacy():
ontology_data = expand_ontology_data(test_source, ["sort1"], None)
assert ontology_data.edam_operations == ["operation_3802"]
assert ontology_data.edam_topics == []


def test_parse_biotools_default_mapping():
test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["cheetah_problem_unbound_var_input"], None)
assert ontology_data.xrefs[0]["reftype"] == "bio.tools"
assert ontology_data.xrefs[0]["value"] == "bwa"

test_source = get_test_tool_source(source_file_name="testtool.yml", source_contents=TOOL_YAML_NO_EXPLICIT_XREFS)
ontology_data = expand_ontology_data(test_source, ["rxlr_motifs"], None)
assert len(ontology_data.xrefs) == 2
values = [x["value"] for x in ontology_data.xrefs]
assert "signalp" in values
assert "hmmer2" in values

0 comments on commit beb4842

Please sign in to comment.