Skip to content

Commit

Permalink
Merge pull request #3363 from obophenotype/overhaul-taxon-subsets
Browse files Browse the repository at this point in the history
Overhaul taxon subsets
  • Loading branch information
gouttegd authored Oct 9, 2024
2 parents ae3f2f8 + 6b519cb commit 3a11216
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 92 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ subsets/*.tsv
src/ontology/subsets/*.json
src/ontology/subsets/*.obo
src/ontology/subsets/*.owl
src/ontology/subsets/*.ofn
src/ontology/subsets/*.tsv
src/ontology/imports/*_import.owl
!src/ontology/imports/orcidio_import.owl
Expand Down
6 changes: 3 additions & 3 deletions src/ontology/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# More information: https://github.com/INCATools/ontology-development-kit/

# Fingerprint of the configuration file when this Makefile was last generated
CONFIG_HASH= 9531f9cc87d9bbbcf0d424b5845e95234e9cd34df6d28bdbb856a4ba7b5557ad
CONFIG_HASH= 943829f9eb8e1b8d46bf5f3f878364bb18f10e0d9eee11d7c28bfb3dfba894c7


# ----------------------------------------
Expand Down Expand Up @@ -154,7 +154,7 @@ $(ROBOT_PLUGINS_DIRECTORY)/%.jar:
# Specific rules for supplementary plugins defined in configuration

$(ROBOT_PLUGINS_DIRECTORY)/uberon.jar:
curl -L -o $@ https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.2.0/uberon.jar
curl -L -o $@ https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.1/uberon.jar


# ----------------------------------------
Expand Down Expand Up @@ -189,7 +189,7 @@ all_imports: $(IMPORT_FILES)
# ----------------------------------------


SUBSETS = appendicular-minimal circulatory-minimal cranial-minimal cumbo digestive-minimal excretory-minimal human-view immune-minimal life-stages-composite life-stages-core life-stages-minimal merged-partonomy mouse-view musculoskeletal-minimal nephron-minimal nervous-minimal pulmonary-minimal renal-minimal reproductive-minimal sensory-minimal xenopus-view amniote-basic euarchontoglires-basic
SUBSETS = appendicular-minimal circulatory-minimal cranial-minimal cumbo digestive-minimal excretory-minimal human-view immune-minimal life-stages-composite life-stages-core life-stages-minimal merged-partonomy mouse-view musculoskeletal-minimal nephron-minimal nervous-minimal pulmonary-minimal renal-minimal reproductive-minimal sensory-minimal xenopus-view amniote-view euarchontoglires-view

SUBSET_ROOTS = $(patsubst %, $(SUBSETDIR)/%, $(SUBSETS))
SUBSET_FILES = $(foreach n,$(SUBSET_ROOTS), $(foreach f,$(FORMATS_INCL_TSV), $(n).$(f)))
Expand Down
4 changes: 0 additions & 4 deletions src/ontology/contexts/context-drosophila.owl

This file was deleted.

4 changes: 0 additions & 4 deletions src/ontology/contexts/context-gnathostome.owl

This file was deleted.

4 changes: 0 additions & 4 deletions src/ontology/contexts/context-human.owl

This file was deleted.

4 changes: 0 additions & 4 deletions src/ontology/contexts/context-mouse.owl

This file was deleted.

3 changes: 0 additions & 3 deletions src/ontology/contexts/context-nematode.owl

This file was deleted.

4 changes: 0 additions & 4 deletions src/ontology/contexts/context-xenopus.owl

This file was deleted.

4 changes: 0 additions & 4 deletions src/ontology/contexts/context-zebrafish.owl

This file was deleted.

6 changes: 3 additions & 3 deletions src/ontology/uberon-odk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ subset_group:
- id: reproductive-minimal
- id: sensory-minimal
- id: xenopus-view
- id: amniote-basic
- id: euarchontoglires-basic
- id: amniote-view
- id: euarchontoglires-view
sssom_mappingset_group:
products:
- id: fbbt
Expand All @@ -123,7 +123,7 @@ robot_java_args: '-Xmx20G'
robot_plugins:
plugins:
- name: uberon
mirror_from: https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.2.0/uberon.jar
mirror_from: https://github.com/gouttegd/uberon-robot-plugin/releases/download/uberon-robot-plugin-0.3.1/uberon.jar
robot_report:
release_reports: False
fail_on: ERROR
Expand Down
123 changes: 64 additions & 59 deletions src/ontology/uberon.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ TAXMODSDIR = taxmods
BRI= true

OWLSRC = $(TMPDIR)/uberon-edit.owl
POSTPROCESS_SRC = $(TMPDIR)/uberon.owl
CATALOG_DYNAMIC = catalog-dynamic.xml
OWLTOOLS_NO_CAT= OWLTOOLS_MEMORY=$(OWLTOOLS_MEMORY) owltools
OWLTOOLS_CAT_DYNAMIC= OWLTOOLS_MEMORY=$(OWLTOOLS_MEMORY) owltools --catalog-xml $(CATALOG_DYNAMIC)
Expand Down Expand Up @@ -127,18 +128,21 @@ MATERIALIZE=
else
MATERIALIZE = materialize -T $(CONFIGDIR)/basic_properties.txt -r elk
endif
uberon.owl: $(OWLSRC) $(BRIDGEDIR)/uberon-bridge-to-bfo.owl $(DEVELOPS_FROM_CHAIN)
$(POSTPROCESS_SRC): $(OWLSRC) $(BRIDGEDIR)/uberon-bridge-to-bfo.owl $(DEVELOPS_FROM_CHAIN)
$(ROBOT) merge -i $(OWLSRC) -i $(BRIDGEDIR)/uberon-bridge-to-bfo.owl \
-i $(DEVELOPS_FROM_CHAIN) \
relax $(MATERIALIZE) \
reason -r elk --exclude-duplicate-axioms true \
--equivalent-classes-allowed asserted-only \
unmerge -i $(DEVELOPS_FROM_CHAIN) \
annotate -O $(URIBASE)/$@ -V $(RELEASE)/$@ -o $@
annotate -O $(URIBASE)/uberon.owl -V $(RELEASE)/uberon.owl -o $@

uberon.json.gz: uberon.json
gzip -c $< > $@.tmp && mv $@.tmp $@
.PRECIOUS: uberon.json.gz
# Step 3: Postprocessing. We merge some files that are derived from the
# previous step.
POSTPROCESS_ADDITIONS = subsets/human-tags.ofn \
subsets/mouse-tags.ofn
uberon.owl: $(POSTPROCESS_SRC) $(POSTPROCESS_ADDITIONS)
$(ROBOT) merge -i $< $(foreach add,$(POSTPROCESS_ADDITIONS),-i $(add)) -o $@


# ----------------------------------------
Expand Down Expand Up @@ -723,7 +727,7 @@ extra-full-bridge-checks: $(foreach ao, $(EXTRA_FULL_CHECK_AO_LIST), $(REPORTDIR

# A quick bridge check uses only uberon plus taxon constraints plus
# bridging axioms, *not* the axioms in the source ontology itself.
$(REPORTDIR)/quick-bridge-check-%.txt: uberon.owl \
$(REPORTDIR)/quick-bridge-check-%.txt: $(POSTPROCESS_SRC) \
$(COMPONENTSDIR)/external-disjoints.obo \
$(TMPDIR)/taxslim-disjoint-over-in-taxon.owl \
$(TMPDIR)/bridges
Expand All @@ -738,7 +742,7 @@ $(REPORTDIR)/quick-bridge-check-%.txt: uberon.owl \
# For this check, we separate the production of the merged ontology
# from the production of the report.
# 1. The merge
$(REPORTDIR)/bridge-check-%.owl: uberon.owl \
$(REPORTDIR)/bridge-check-%.owl: $(POSTPROCESS_SRC) \
$(COMPONENTSDIR)/external-disjoints.obo \
$(TMPDIR)/taxslim-disjoint-over-in-taxon.owl \
$(TMPDIR)/bridges \
Expand Down Expand Up @@ -929,40 +933,57 @@ subsets/life-stages-core.owl: uberon.owl
# Taxon subsets
# ----------------------------------------

all_taxmods: $(TAXMODSDIR)/uberon-taxmod-amniote.obo $(TAXMODSDIR)/uberon-taxmod-euarchontoglires.obo

$(TAXMODSDIR)/uberon-taxmod-euarchontoglires.owl: $(TMPDIR)/uberon-taxmod-314146.owl
cp $< $@

$(TAXMODSDIR)/uberon-taxmod-amniote.owl: $(TMPDIR)/uberon-taxmod-32524.owl
cp $< $@

$(TAXMODSDIR)/uberon-taxmod-human.owl: $(TMPDIR)/uberon-taxmod-9606.owl
cp $< $@

subsets/%-basic.owl: $(TAXMODSDIR)/uberon-taxmod-%.owl tmp/simple-slim-seed.txt
$(ROBOT) reason --input $< \
--reasoner ELK --equivalent-classes-allowed all --exclude-tautologies structural \
TAXON_ID_human = NCBITaxon:9606
TAXON_ID_mouse = NCBITaxon:10090
TAXON_ID_xenopus = NCBITaxon:8353
TAXON_ID_drosophila = NCBITaxon:7227
TAXON_ID_gnathostome = NCBITaxon:7776
TAXON_ID_amniote = NCBITaxon:32524
TAXON_ID_euarchontoglires = NCBITaxon:314146
TAXON_ID_nematode = NCBITaxon:6237
TAXON_ID_zebrafish = NCBITaxon:7955

# Strategy to use to create the taxon subsets:
# - default: OWLTools' original strategy; given a root class R and a
# taxon T, assert 'R SubClassOf: in_taxon some T' and exclude
# all classes that are unsatisfiable because of that
# assertion.
# - precise: alternative strategy; given a root class R and a taxon T,
# iterate through all subclasses C of R and include classes
# for which the expression 'C and in_taxon some T' is
# satisfiable; about 3-5 times slower than 'default'.
TAXON_SUBSET_STRATEGY = default

TAXON_SUBSET_ROOTS = UBERON:0001062 UBERON:0000000

# Create a taxon-specific subset. This rule creates two distinct files:
# (1) the subset proper (subsets/%-view.owl), which can be used on its
# own (and can be published as a release artifact if desired);
# (2) a small file containing oboInOwl:inSubset annotations to "tag" all
# terms that belong to the subset (subsets/%-tags.ofn); that file
# can then be merged to the main release product (last step of the
# "BUILDING UBERON ITSELF" pipeline).
.PRECIOUS: subsets/%-view.owl
subsets/%-view.owl subsets/%-tags.ofn: $(POSTPROCESS_SRC) | all_robot_plugins
$(ROBOT) uberon:create-species-subset --input $< \
--taxon $(TAXON_ID_$*) \
--strategy $(TAXON_SUBSET_STRATEGY) \
--reasoner ELK \
$(foreach root,$(TAXON_SUBSET_ROOTS),--root $(root)) \
--prefix 'uberon: http://purl.obolibrary.org/obo/uberon/core#' \
--subset-name uberon:$*_subset \
--only-tag-in UBERON: \
--write-tags-to subsets/$*-tags.ofn \
reason --reasoner ELK --equivalent-classes-allowed all \
--exclude-tautologies structural \
relax \
remove --axioms equivalent \
relax \
filter --term-file tmp/simple-slim-seed.txt \
--select "annotations ontology anonymous self" --trim true --signature true \
reduce -r ELK \
query --update ../sparql/inject-subset-declaration.ru \
annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \
convert -f ofn -o $@.tmp.owl && mv $@.tmp.owl $@
.PRECIOUS: subsets/%-basic.owl

$(TAXMODSDIR)/uberon-taxmod-%.obo: $(TAXMODSDIR)/uberon-taxmod-%.owl
$(OWLTOOLS) $< --remove-imports-declarations -o -f obo --no-check $@.tmp && grep -v ^owl $@.tmp > $@

# added --allowEquivalencies, see https://github.com/geneontology/go-ontology/issues/12926
$(TMPDIR)/uberon-taxmod-%.owl: uberon.owl
$(OWLTOOLS) $< --reasoner elk --make-species-subset --perform-macro-expansion false -t NCBITaxon:$* \
--assert-inferred-subclass-axioms --allowEquivalencies --useIsInferred --remove-dangling \
--set-ontology-id $(URIBASE)/uberon/subsets/$@ -o $@ 2>&1 > $@.log
.PRECIOUS: $(TMPDIR)/uberon-taxmod-%.owl
reduce --reasoner ELK \
annotate --ontology-iri $(ONTBASE)/subsets/$*-view.owl \
--version-iri $(ONTBASE)/releases/$(VERSION)/subsets/$*-view.owl \
--annotation owl:versionInfo $(VERSION) \
convert --format ofn --output subsets/$*-view.owl


# Other subsets
Expand All @@ -985,26 +1006,6 @@ subsets/immaterial.obo: uberon.owl
--make-ontology-from-results $(URIBASE)/uberon/$@ \
-o -f obo $@ --reasoner-dispose 2>&1 > $@.LOG

# The first step is a simple "merge+reason", but it still requires
# Owltools because ROBOT has no equivalent to the -x option to simply
# ignore unsatisfiable classes without erroring out.
subsets/%-view.owl: uberon.owl contexts/context-%.owl tmp/simple-slim-seed.txt
$(OWLTOOLS) uberon.owl contexts/context-$*.owl --merge-support-ontologies --merge-imports-closure \
$(QELK) --run-reasoner -r elk -x -o -f ofn $@.tmp.owl && \
$(ROBOT) reason --input $@.tmp.owl \
--reasoner ELK --equivalent-classes-allowed all --exclude-tautologies structural \
unmerge -i contexts/context-$*.owl \
relax \
remove --axioms equivalent \
relax \
filter --term-file tmp/simple-slim-seed.txt \
--select "annotations ontology anonymous self" --trim true --signature true \
reduce -r ELK \
query --update ../sparql/inject-subset-declaration.ru \
annotate --ontology-iri $(ONTBASE)/$@ $(ANNOTATE_ONTOLOGY_VERSION) \
convert -f ofn -o $@.tmp.owl && mv $@.tmp.owl $@
.PRECIOUS: subsets/%-view.owl

%-partview.owl: %.owl
$(OWLTOOLS) $< --remove-subset grouping_class --remove-subset upper_level \
--bpvo --reflexive --prefix "" --suffix " part" -r elk -p BFO:0000050 --replace \
Expand Down Expand Up @@ -1121,7 +1122,7 @@ endif
# Uberon, CL, and the components listed in the COLLECTED_*_SOURCES
# variables above.
.PRECIOUS: $(TMPDIR)/collected-%.owl
$(TMPDIR)/collected-%.owl: $(BRIDGEDIR)/collected-%-hdr.owl uberon.owl $(IMPORTDIR)/local-cl.owl \
$(TMPDIR)/collected-%.owl: $(BRIDGEDIR)/collected-%-hdr.owl $(POSTPROCESS_SRC) $(IMPORTDIR)/local-cl.owl \
$$(COLLECTED_$$*_SOURCES) $(TMPDIR)/bridges
$(ROBOT) merge $(foreach src,$^,-i $(src)) -o $@

Expand Down Expand Up @@ -1504,6 +1505,10 @@ refresh-external-resources:
# May include both actually useful stuff and stuff that nobody
# remembers why it was written in the first place.

uberon.json.gz: uberon.json
gzip -c $< > $@.tmp && mv $@.tmp $@
.PRECIOUS: uberon.json.gz

TEMPLATESDIR=templates

TEMPLATES=$(patsubst %.tsv, $(TEMPLATESDIR)/%.owl, $(notdir $(wildcard $(TEMPLATESDIR)/*.tsv)))
Expand Down

0 comments on commit 3a11216

Please sign in to comment.