-
Notifications
You must be signed in to change notification settings - Fork 1
/
RTXCurieUtil.py
57 lines (51 loc) · 1.79 KB
/
RTXCurieUtil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
__all__ = ['convert_rtx_curie_id_to_bt_explorer_input']
_input_prefix_map = {
# metabolite
"KEGG": "kegg.compound",
# protein
"UniProtKB": "uniprot",
# anatomical_entity
"UBERON": "uberon",
# TODO category?
"CL": "cl",
# gene_ontolog
"GO": "go",
# disease
"DOID": "do",
# genetic_condition
"OMIM": "omim.disease",
# phenotypic_feature
"HP": "hp",
# "AQTLTrait": None,
# microRNA
"NCBIGene": "ncbigene",
# pathway
"REACT": "reactome.pathway",
# checmical_substance
"ChEMBL": "chembl.compound"
}
def convert_rtx_curie_id_to_bt_explorer_input(curie_id):
"""
KEGG --> ^C\d+$, equivalent to splitting by colon. E.g. "KEGG:C123" --> "C123"
UniProtKB --> split by colon; use the second part as input value
UBERON --> as is
CL --> as is (e.g "CL:0000492")
GO --> as is
DOID --> split by colon; use the second part as input value (Not compatible with identifiers.org)
OMIM --> split by colon; use the second part as input value
HP --> as is
NCBIGene --> split bycolon; use the second part as input value
REACT --> split bycolon; use the second part as input value
ChEMBL --> ^CHEMBL\d+$, e.g. "ChEMBL:123" --> "CHEMBL123"
"""
curie_prefix, curie_value = curie_id.split(":")
if curie_prefix in {"KEGG", "UniProtKB", "DOID", "OMIM", "NCBIGene", "REACT"}:
bte_input_value = curie_value
elif curie_prefix == "ChEMBL":
bte_input_value = "CHEMBL" + curie_value
else:
bte_input_value = curie_id # as is
bte_input_prefix = _input_prefix_map.get(curie_prefix, None)
if bte_input_prefix is None:
raise ValueError("Cannot find BioThings Explorer prefix for curie prefix {}".format(curie_prefix))
return bte_input_prefix, bte_input_value