-
Notifications
You must be signed in to change notification settings - Fork 6
/
validate_ontology.py
129 lines (100 loc) · 3.85 KB
/
validate_ontology.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import requests
import json
def logger(*args):
print(*args)
class OntologyValidation:
ontology_rules = {
'sample_ontology_curie': {
'Cell Line': 'efo',
'Primary Cell': 'cl',
'Primary Tissue': 'uberon',
'Primary Cell Culture' : "__no_ontology_defined__",
},
'origin_sample_ontology_curie' : 'uberon',
'experiment_ontology_curie': 'obi',
'molecule_ontology_curie': 'so',
# leaving out these two for now
'disease_ontology_curie': 'ncim',
'donor_health_status_ontology_curie': 'ncim'
}
def __init__(self):
self.verbose = False
self.mock = False
self.base_url = 'https://www.ebi.ac.uk/ols/api/search'
self.cache = dict()
def parse_curie(self, curie):
[ontology_name, term] = curie.split(':')
return {'ontology_name':ontology_name, 'curie': term}
def payload(self, curie):
try:
raw = self.parse_curie(curie)
q = raw['ontology_name'].upper() + '_' + raw['curie']
ontology = raw['ontology_name']
return {'q': q, 'ontology': ontology}
except Exception as err:
return {'err': str(err), 'term':term}
def accepteddb(self, term, ontology_type, subparam=None):
rule_ontology = OntologyValidation.ontology_rules[ontology_type]
current_ontology = self.parse_curie(term)['ontology_name'].lower()
termdata = self.parse_curie(term)
if subparam and isinstance(rule_ontology, dict):
rule_ontology = rule_ontology[subparam]
if rule_ontology == "__no_ontology_defined__":
return {'ok': True}
if not current_ontology == rule_ontology:
err = '#__invalid_ontology_db__: "' + current_ontology + '" is not valid for ' + ontology_type + '/' + str(subparam) + ', expected ' + str(rule_ontology)
return {'ok':False, 'err':err}
else:
return {'ok':True}
def __call__(self, term, refresh=False):
if term in self.cache and not refresh:
return self.cache[term]
if self.mock: return { 'ok' : True, 'mocked':True}
payload = self.payload(term)
if 'err' in payload:
logger("#__could_not_parse__:", payload)
payload['ok'] = False
return payload
try:
r = requests.get(self.base_url, headers={'accept': 'application/json'}, params=payload)
response = json.loads(r.content.decode('utf-8'))
response = response.get('response')
if response.get('numFound') > 0:
if self.verbose: logger('#__validated_curie__: {0}'.format(term))
r = {'ok': True}
else:
if self.verbose: logger('#__does_not_validate__: {0}'.format(term))
r = {'ok':False, 'err': '__notfound__'}
self.cache[term] = r
return r
except Exception as err:
return {'ok': False, 'err':str(err)}
validate_ontology = OntologyValidation()
#validate_ontology.mock = True
def check_term(term, termtype, subparam=None):
ok = True
for e in term:
if not e:
ok = False
else:
tags = e.split(':')
if len(tags) == 2:
ok1 = validate_ontology.accepteddb(e, ontology_type = termtype, subparam=subparam)
if ok1['ok']:
ok2 = validate_ontology(e)
ok = ok and ok1['ok'] and ok2['ok']
else:
ok = False
return ok
def tests():
print(validate_ontology('obi:000185812'))
print(validate_ontology('obi:0001858'))
print(validate_ontology('ncit:C115935xxxxxxxxx'))
check_term(['ncit:C115935xxxxxxxxx'], 'disease_ontology_curie')
for e in [("molecule_ontology_curie", ["so:0000991"]), ("experiment_ontology_curie", ["obi:OBI_0001863"]), ('disease_ontology_curie', ['ncit:C115935']), ('sample_ontology_curie', ['cl:0001054']), ('donor_health_status_ontology_curie', ['ncit:C0277545'])]:
print(e, check_term(e[1], e[0], 'Primary Cell'))
print(validate_ontology.accepteddb('obi:000185812', ontology_type='sample_ontology_curie'))
print(validate_ontology.accepteddb('obi:000185812', ontology_type='experiment_ontology_curie'))
print(validate_ontology.accepteddb('obi:0001858',ontology_type='sample_ontology_curie', subparam='Cell Line'))
if __name__ == '__main__':
tests()