From 0a96bed9fb6c40fcb0207c53b4d223068b9926da Mon Sep 17 00:00:00 2001 From: Tom Gillespie Date: Wed, 31 May 2023 22:21:16 -0700 Subject: [PATCH] neurondm update composer.py example, nlp include literatureCitation now more closely aligned with the current composer data model, includes references if they are present on individual neurons the example code is a bit more convoluted, but it works from local and remote sources as needed now also better error reporting on request failure in OntMetaIri so that we can immediately see what uri failed to fetch --- neurondm/docs/composer.py | 175 +++++++++++++++++++++++--------- neurondm/neurondm/core.py | 6 +- neurondm/neurondm/models/nlp.py | 2 + pyontutils/core.py | 3 + 4 files changed, 134 insertions(+), 52 deletions(-) diff --git a/neurondm/docs/composer.py b/neurondm/docs/composer.py index ff5b1cb3..1bfd03aa 100644 --- a/neurondm/docs/composer.py +++ b/neurondm/docs/composer.py @@ -1,6 +1,7 @@ -from pyontutils.core import OntGraph, OntResIri +import os +from pyontutils.core import OntGraph, OntResIri, OntResPath from pyontutils.namespaces import rdfs, ilxtr -from neurondm.core import Config, graphBase +from neurondm.core import Config, graphBase, log from neurondm.core import OntTerm, OntId, RDFL @@ -12,36 +13,84 @@ def multi_orig_dest(neuron): return True -def lg(neuron, predicate): - # TODO could add expected cardinality here if needed - return list(neuron.getObjects(predicate)) - - -def for_composer(n): - return dict( - id = n.id_, - label = n.origLabel, - origin = lg(n, ilxtr.hasSomaLocatedIn), - dest_presyn = lg(n, ilxtr.hasAxonPresynapticElementIn), - dest_sens = lg(n, ilxtr.hasAxonSensorySubcellularElementIn), - dest_dend = lg(n, ilxtr.hasDendriteLocatedIn), - path = lg(n, ilxtr.hasAxonLocatedIn), # TODO pull ordering from partial orders (not implemented in core atm) - #laterality = lg(n, ilxtr.hasLaterality), # left/rigth tricky ? - #projection_laterality = lg(n, ilxtr.???), # axon located in contra ? - species = lg(n, ilxtr.hasInstanceInTaxon), - sex = lg(n, ilxtr.hasBiologicalSex), - circuit_type = lg(n, ilxtr.hasCircuitRolePhenotype), +def makelpesrdf(): + collect = [] + def lpes(neuron, predicate): + """ get predicates from python bags """ + # TODO could add expected cardinality here if needed + return [str(o) for o in neuron.getObjects(predicate) + if not collect.append((predicate, o))] + + def lrdf(neuron, predicate): + """ get predicates from graph """ + return [ # XXX FIXME core_graph bad etc. + str(o) for o in + neuron.core_graph[neuron.identifier:predicate]] + + return lpes, lrdf, collect + + +def for_composer(n, cull=False): + lpes, lrdf, collect = makelpesrdf() + fc = dict( + id = str(n.id_), + label = str(n.origLabel), + origin = lpes(n, ilxtr.hasSomaLocatedIn), + dest = ( + # XXX looking at this there seems to be a fault assumption that + # there is only a single destination type per statement, this is + # not the case, there is destination type per destination + [dict(loc=l, type='AXON-T') for l in lpes(n, ilxtr.hasAxonPresynapticElementIn)] + + # XXX I strongly reccoment renaming this to SENSORY-T so that the + # short forms are harder to confuse A-T and S-T + [dict(loc=l, type='AFFERENT-T') for l in lpes(n, ilxtr.hasAxonSensorySubcellularElementIn)] + ), + path = ( # TODO pull ordering from partial orders (not implemented in core atm) + [dict(loc=l, type='AXON') for l in lpes(n, ilxtr.hasAxonLocatedIn)] + + # XXX dendrites don't really ... via ... they are all both terminal and via at the same time ... + [dict(loc=l, type='DENDRITE') for l in lpes(n, ilxtr.hasDendriteLocatedIn)] + ), + #laterality = lpes(n, ilxtr.hasLaterality), # left/rigth tricky ? + #projection_laterality = lpes(n, ilxtr.???), # axon located in contra ? + species = lpes(n, ilxtr.hasInstanceInTaxon), + sex = lpes(n, ilxtr.hasBiologicalSex), + circuit_type = lpes(n, ilxtr.hasCircuitRolePhenotype), + phenotype = lpes(n, ilxtr.hasAnatomicalSystemPhenotype), # current meaning of composer phenotype + anatomical_system = lpes(n, ilxtr.hasAnatomicalSystemPhenotype), # there are a number of dimensions that we aren't converting right now - dont_know_fcrp = lg(n, ilxtr.hasFunctionalCircuitRolePhenotype), - phenotype = (lg(n, ilxtr.hasPhenotype) # FIXME currently a grab bag of other types here - + lg(n, ilxtr.hasMolecularPhenotype) - + lg(n, ilxtr.hasProjectionPhenotype)), - forward_connection = lg(n, ilxtr.hasForwardConnectionPhenotype), + dont_know_fcrp = lpes(n, ilxtr.hasFunctionalCircuitRolePhenotype), + other_phenotype = ( lpes(n, ilxtr.hasPhenotype) + + lpes(n, ilxtr.hasMolecularPhenotype) + + lpes(n, ilxtr.hasProjectionPhenotype)), + forward_connection = lpes(n, ilxtr.hasForwardConnectionPhenotype), + + # direct references from individual individual neurons + provenance = lrdf(n, ilxtr.literatureCitation), + sentence_number = lrdf(n, ilxtr.sentenceNumber), + note_alert = lrdf(n, ilxtr.alertNote), + # XXX provenance from ApiNATOMY models as a whole is not ingested + # right now because composer lacks support for 1:n from neuron to + # prov, (or rather lacks prov collections) and because it attaches + # prov to the sentece, which does not exist for all neurons + # TODO more ... + # notes = ? + + # for _ignore, hasClassificationPhenotype is used for ApiNATOMY + # unlikely to be encountered for real neurons any time soon + _ignore = lpes(n, ilxtr.hasClassificationPhenotype), # used to ensure we account for all phenotypes ) + npo = set((p.e, p.p) for p in n.pes) + cpo = set(collect) + unaccounted_pos = npo - cpo + if unaccounted_pos: + log.warning( + (n.id_, [[n.in_graph.namespace_manager.qname(e) for e in pos] + for pos in unaccounted_pos])) + return {k:v for k, v in fc.items() if v} if cull else fc -def location_summary(neurons, services): +def location_summary(neurons, services, anatent_simple=False): import csv OntTerm.query._services = services locations = sorted(set( @@ -52,46 +101,75 @@ def key(t): return (t.prefix, t.label[0].lower() if isinstance(t, tuple) else t.lower()) - header = 'label', 'curie', 'iri' - rows = ( - [header] + - [(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)]) - with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f: - csv.writer(f).writerows(rows) + + if anatent_simple: + header = 'label', 'curie', 'iri' + rows = ( + [header] + + [(_.label, _.curie, _.iri) for _ in sorted(locations, key=key)]) + with open('/tmp/npo-nlp-apinat-location-summary.csv', 'wt') as f: + csv.writer(f, lineterminator='\n').writerows(rows) + + else: + header = 'o', 'o_label', 'o_synonym' + rows = ( + [header] + + [(_.iri, _.label, syn) for _ in sorted(locations, key=key) + for syn in _.synonyms]) + with open('/tmp/anatomical_entities.csv', 'wt') as f: + csv.writer(f, lineterminator='\n').writerows(rows) -def main(): +def main(local=False, anatomical_entities=False, anatent_simple=False): + # if (local := True, anatomical_entities := True, anatent_simple := False): + config = Config('random-merge') g = OntGraph() # load and query graph # remove scigraph and interlex calls graphBase._sgv = None del graphBase._sgv - _old_query_services = OntTerm.query._services + if len(OntTerm.query._services) > 1: + # backup services and avoid issues on rerun + _old_query_services = OntTerm.query._services + _noloc_query_services = _old_query_services[1:] + OntTerm.query._services = (RDFL(g, OntId),) - b = ('https://raw.githubusercontent.com/SciCrunch/' - 'NIF-Ontology/neurons/ttl/generated/neurons/') + # base paths to ontology files + gen_neurons_path = 'ttl/generated/neurons/' + suffix = '.ttl' + if local: + from pyontutils.config import auth + olr = auth.get_path('ontology-local-repo') + local_base = olr / gen_neurons_path + else: + orr = 'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/neurons/' + remote_base = orr + gen_neurons_path # full imports for f in ('apinat-partial-orders', 'apinat-pops-more', 'apinat-simple-sheet', 'sparc-nlp'): - ori = OntResIri(b + f + '.ttl') + if local: + ori = OntResPath(local_base / (f + suffix)) + else: + ori = OntResIri(remote_base + f + suffix) [g.add(t) for t in ori.graph] # label only imports - for f in ( - b + 'apinatomy-neuron-populations' + '.ttl', - ('https://raw.githubusercontent.com/SciCrunch/' - 'NIF-Ontology/neurons/ttl/npo.ttl')): - ori = OntResIri(f) + for f in ('apinatomy-neuron-populations', + '../../npo'): + p = os.path.normpath(gen_neurons_path + f) + if local: + ori = OntResPath(olr / (p + suffix)) + else: + ori = OntResIri(orr + p + suffix) + [g.add((s, rdfs.label, o)) for s, o in ori.graph[:rdfs.label:]] config.load_existing(g) - # FIXME currently subClassOf axioms are not parsed back so we are e.g. - # missing hasInstanceInTaxon axioms for apinatomy neurons neurons = config.neurons() # scigraph required here if deps not removed above # ingest to composer starts here @@ -99,15 +177,14 @@ def main(): dims = set(p for n in neurons for p in n.edges) # for reference fcs = [for_composer(n) for n in mvp_ingest] + _fcne = [for_composer(n, cull=True) for n in mvp_ingest] # exclude empties for easier manual review # example neuron n = mvp_ingest[0] fc = for_composer(n) - if False: - location_summary(neurons, _old_query_services) - - breakpoint() + if anatomical_entities: + location_summary(neurons, _noloc_query_services, anatent_simple) if __name__ == '__main__': diff --git a/neurondm/neurondm/core.py b/neurondm/neurondm/core.py index d8965df3..27d68885 100644 --- a/neurondm/neurondm/core.py +++ b/neurondm/neurondm/core.py @@ -2465,14 +2465,14 @@ def _load_existing(cls, iris): if not cls._loading: NeuronBase._loading = True # block all other neuron loading try: - log.debug(str([i for i in iris if '4164' in i or '100212' in i])) + #log.debug(str([i for i in iris if '4164' in i or '100212' in i])) for iri in iris: # rod/cone issue #breakpoint() try: n = cls(id_=iri, override=True)#, out_graph=cls.config.load_graph) # I think we can get away without this - if iri.endswith('4164') or iri.endswith('100212'): - log.debug(f'{iri} -> {n}') + #if iri.endswith('4164') or iri.endswith('100212'): + #log.debug(f'{iri} -> {n}') # because we just call Config again an everything resets except cls.owlClassMismatch as e: diff --git a/neurondm/neurondm/models/nlp.py b/neurondm/neurondm/models/nlp.py index ded711d5..46c27ad7 100644 --- a/neurondm/neurondm/models/nlp.py +++ b/neurondm/neurondm/models/nlp.py @@ -118,6 +118,7 @@ def asdf(s, p, rm): asdf(s, ilxtr.curatorNote, r.curation_notes) asdf(s, ilxtr.reviewNote, r.review_notes) asdf(s, ilxtr.reference, r.reference_pubmed_id__doi_or_text) + asdf(s, ilxtr.literatureCitation, r.literature_citation) asdf(s, rdfs.label, r.neuron_population_label_a_to_b_via_c) if hasattr(r, 'alert_explanation'): asdf(s, ilxtr.alertNote, r.alert_explanation) @@ -126,6 +127,7 @@ def asdf(s, p, rm): p = map_predicates(r.relationship().value) o = OntId(r.explicit_complement().value) ec[(s, p)] = o + if hasattr(r, 'axonal_course_poset') and r.axonal_course_poset().value: # s.u and OntId(...).u to avoid duplicate subjects/objects in the graph # due to type vs instance issues for rdflib.URIRef and OntId diff --git a/pyontutils/core.py b/pyontutils/core.py index cc839ff1..3b30fe72 100644 --- a/pyontutils/core.py +++ b/pyontutils/core.py @@ -649,6 +649,9 @@ def _data_from_generator(self, conventions_type, yield_response_gen, ): + if not resp.ok: + resp.raise_for_status() + first = next(gen) # TODO better type detection