Skip to content

Commit

Permalink
Implement auto-generation when building docs
Browse files Browse the repository at this point in the history
Auto-generate ontologies in `ontology/` when building the documentation
and place them under the `docs/ontology/versions/` folder.

NB! Currently, the previous versions are not kept, as everything under
the `docs/ontology/versions/` folder is ignored by git.
  • Loading branch information
CasperWA committed Jul 22, 2022
1 parent ab3d1b5 commit 7dad7b0
Show file tree
Hide file tree
Showing 14 changed files with 25,952 additions and 26,357 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ __pycache__

# Documentation
site/
docs/ontology/versions/
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ repos:
- --package-dir=dic2owl/dic2owl
- --unwanted-file=__init__.py
- --unwanted-file=_utils.py
- --unwanted-folder=_mkdocs
- id: docs-landing-page
args:
- --replacement=(LICENSE),(LICENSE.md)
Expand Down
Empty file.
137 changes: 137 additions & 0 deletions dic2owl/dic2owl/_mkdocs/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""A plugin for MkDocs to run pre-build functionality."""
import os
import re
import shutil
from contextlib import redirect_stderr
from pathlib import Path
from typing import TYPE_CHECKING
from urllib.error import HTTPError

from mkdocs.config.config_options import Type
from mkdocs.exceptions import PluginError
from mkdocs.plugins import BasePlugin
from mkdocstrings.loggers import get_logger

# Remove the print statement concerning 'owlready2_optimized' when importing owlready2
# (which is imported also in emmo).
with open(os.devnull, "w", encoding="utf8") as handle:
with redirect_stderr(handle):
from ontopy import get_ontology

if TYPE_CHECKING:
from mkdocs.config import Config
from ontopy.ontology import Ontology


LOGGER = get_logger(__name__)

VERSION_IRI_REGEX = re.compile(
r"https?://(?P<domain>[a-zA-Z._-]+)/(?P<path>[a-zA-Z_-]+(/[a-zA-Z_-]+)*)"
r"/(?P<version>[0-9a-zA-Z._-]+)(/(?P<name>[a-zA-Z_-]+))?(/(?P<filename>[a-zA-Z_.-]+))?"
)


class OntologyBuildPlugin(BasePlugin):
"""Build ontologies."""

config_scheme = (
("ontology_dir", Type(str, default="ontology")),
("publish_dir", Type(str, default="docs/ontology/versions")),
("create_dirs", Type(bool, default=False)),
)

def on_pre_build( # pylint: disable=too-many-locals
self, config: "Config" # pylint: disable=unused-argument
) -> None:
"""Build versioned ontologies.
Hook for the [`pre-build` event](https://www.mkdocs.org/dev-guide/plugins/#on_pre_build).
Parameters:
config: The MkDocs Config object.
"""
root_dir = Path(__file__).resolve().parent.parent.parent.parent
ontology_dir: Path = root_dir / self.config["ontology_dir"]
publish_dir: Path = root_dir / self.config["publish_dir"]

if not ontology_dir.exists() or not publish_dir.exists():
if self.config["create_dirs"]:
ontology_dir.mkdir(parents=True, exist_ok=True)
publish_dir.mkdir(parents=True, exist_ok=True)
else:
raise PluginError(
"The given 'ontology_dir' and 'publish_dir' must exist. "
"Otherwise, 'create_dirs' should be 'True'."
)

ontology_files = list(ontology_dir.glob("*.ttl"))
catalog_file = sorted(ontology_dir.glob("catalog-*.xml"), reverse=True)[0]

LOGGER.debug("Building ontologies:")
for ontology_file in ontology_files:
LOGGER.debug(" * %s", ontology_file.name)
ontology: "Ontology" = get_ontology(str(ontology_file))
try:
ontology.load()
except HTTPError:
pass
try:
version_iri = ontology.get_version(as_iri=True)
except TypeError as exc:
raise PluginError(str(exc)) from exc

version_iri_match = VERSION_IRI_REGEX.fullmatch(version_iri)
if version_iri_match is None:
raise PluginError(
f"Could not retrieve versionIRI properly from {ontology_file.name!r}"
)

version_iri_parts = version_iri_match.groupdict()
version_iri_parts["top_name"] = version_iri_parts["path"].rsplit("/", 1)[-1]

relative_destination_dir = (
Path()
/ version_iri_parts["top_name"]
/ version_iri_parts["version"]
/ version_iri_parts["name"]
if version_iri_parts["name"]
else Path()
/ version_iri_parts["top_name"]
/ version_iri_parts["version"]
)
(publish_dir / relative_destination_dir).mkdir(parents=True, exist_ok=True)
shutil.copyfile(
src=ontology_file,
dst=publish_dir / relative_destination_dir / ontology_file.name,
)

shutil.copyfile(
src=catalog_file,
dst=publish_dir / relative_destination_dir / catalog_file.name,
)
lines = []
for line in (
(publish_dir / relative_destination_dir / catalog_file.name)
.read_text("utf8")
.splitlines()
):
if "<uri" in line and ontology_file.name not in line:
match = re.match(r"^.* uri=\"(?P<filename>.*)\.ttl\"/>$", line)
if not match:
raise PluginError(
"Could not determine filename in catalog file."
)
filename: str = match.group("filename")
lines.append(
re.sub(
r"uri=\".*\.ttl\"",
f'uri="../{filename}/{filename}.ttl"',
line,
)
)
else:
lines.append(line)
(publish_dir / relative_destination_dir / catalog_file.name).write_text(
"\n".join(lines) + "\n", encoding="utf8"
)
8 changes: 7 additions & 1 deletion dic2owl/dic2owl/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ def main(argv: list = None) -> None:
" Turtle file."
),
)
parser.add_argument(
"--dicversion",
type=str,
default="0.0.1",
help="The generated ontology's version.",
)

args = parser.parse_args(argv)

Expand All @@ -77,4 +83,4 @@ def main(argv: list = None) -> None:
# downloaded.
args.dicfile = str(args.dicfile)

dic2owl_run(dicfile=args.dicfile, ttlfile=args.ttlfile)
dic2owl_run(dicfile=args.dicfile, ttlfile=args.ttlfile, version=args.dicversion)
43 changes: 25 additions & 18 deletions dic2owl/dic2owl/dic2owl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
with open(DEVNULL, "w", encoding="utf8") as handle:
with redirect_stderr(handle):
from ontopy import World
from ontopy.ontology import Ontology

from dic2owl._utils import MissingAnnotationError, lang_en

if TYPE_CHECKING:
from typing import Any, Sequence, Set, Union
from typing import Any, Optional, Sequence, Set, Union

from _typeshed import StrPath
from ontopy.ontology import Ontology

# Workaround for flaw in EMMO-Python
# To be removed when EMMO-Python doesn't requires ontologies to import SKOS
Expand All @@ -36,7 +36,7 @@
]


# pylint: disable=too-few-public-methods
# pylint: disable=too-few-public-methods,too-many-instance-attributes
class Generator:
"""Class for generating CIF ontology from a CIF dictionary.
Expand All @@ -55,29 +55,30 @@ def __init__(
self,
dicfile: "StrPath",
base_iri: str,
version: str,
comments: "Sequence[str]" = (),
) -> None:
self.dicfile = dicfile
self.dic = CifDic(str(self.dicfile), do_dREL=False)
self.version = version
self.comments = comments

# Create new ontology
self.world = World()
self.onto: "Ontology" = self.world.get_ontology(base_iri)
self.onto: Ontology = self.world.get_ontology(base_iri)

# Load cif-ddl ontology and append it to imported ontologies
cif_ddl_path = (
Path(__file__).resolve().parent.parent.parent / "ontology" / "cif-ddl.ttl"
Path(__file__).resolve().parent.parent.parent / "ontology" / "ddl.ttl"
)
if cif_ddl_path.exists():
cif_ddl = cif_ddl_path.as_uri()
else:
cif_ddl = (
"https://raw.githubusercontent.com/emmo-repo/CIF-ontology/main"
"/ontology/cif-ddl.ttl"
"/ontology/ddl.ttl"
)
self.ddl: "Ontology" = self.world.get_ontology(cif_ddl).load()
self.ddl.sync_python_names()
self.ddl = self.world.get_ontology(cif_ddl).load()
self.onto.imported_ontologies.append(self.ddl)

# Load Dublin core for metadata and append it to imported ontologies
Expand All @@ -98,6 +99,8 @@ def generate(self) -> "Ontology":

self._add_metadata()
self.onto.sync_attributes()

# self.onto.world.as_rdflib_graph().namespace_manager.bind("cif_ddl", self.ddl.base_iri)
return self.onto

def _add_item(self, item) -> None:
Expand Down Expand Up @@ -205,14 +208,20 @@ def _add_metadata(self) -> None:
# TODO:
# Is there a way to extract metadata from the dic object like
# _dictionary_audit.version?
# onto.set_version(version="XXX")
self.onto.set_version(version=self.version)

for comment in self.comments:
self.onto.metadata.comment.append(comment)
self.onto.metadata.comment.append(f"Generated with dic2owl from {self.dicfile}")
self.onto.metadata.comment.append(
lang_en(f"Generated with dic2owl from {self.dicfile}")
)


def main(dicfile: "Union[str, Path]", ttlfile: "Union[str, Path]") -> Generator:
def main(
dicfile: "Union[str, Path]",
ttlfile: "Union[str, Path]",
version: "Optional[str]" = None,
) -> Generator:
"""Main function for ontology generation.
Parameters:
Expand All @@ -232,9 +241,10 @@ def main(dicfile: "Union[str, Path]", ttlfile: "Union[str, Path]") -> Generator:
debugging reasons.
"""
base_iri = "http://emmo.info/CIF-ontology/ontology/cif_core#"

dicfile = dicfile if isinstance(dicfile, str) else str(dicfile.resolve())
ttlfile = ttlfile if isinstance(ttlfile, str) else str(ttlfile.resolve())

base_iri = f"http://emmo.info/CIF-ontology/{Path(ttlfile).stem}#"

# Download the CIF dictionaries to current directory
baseurl = "https://raw.githubusercontent.com/COMCIFS/cif_core/master/"
Expand All @@ -245,11 +255,8 @@ def main(dicfile: "Union[str, Path]", ttlfile: "Union[str, Path]") -> Generator:
# `file://` or similar.
urllib.request.urlretrieve(baseurl + dic, dic) # nosec

gen = Generator(dicfile=dicfile, base_iri=base_iri)
gen = Generator(dicfile=dicfile, base_iri=base_iri, version=version or "0.0.1")
onto = gen.generate()
onto.save(
ttlfile if isinstance(ttlfile, str) else str(ttlfile.resolve()),
overwrite=True,
)
onto.save(ttlfile, overwrite=True)

return gen # XXX - just for debugging
1 change: 1 addition & 0 deletions dic2owl/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
extras_require={"dev": DEV},
entry_points={
"console_scripts": ["dic2owl = dic2owl.cli:main"],
"mkdocs.plugins": ["dic2owl = dic2owl._mkdocs.plugin:OntologyBuildPlugin"],
},
keywords="crystallography ontology materials",
classifiers=[
Expand Down
2 changes: 1 addition & 1 deletion docs/ontology/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ The following table represents the released versions of the CIF ontologies.

| **Version** | **_CIF DDL_** | **_CIF Core_** |
|:--- |:---:|:---:|
| development | [0.1.0](./versions/cif-ddl/0.1.0/cif-ddl.ttl) | [0.1.0](./versions/cif-core/0.1.0/cif-core.ttl) |
| development | [0.1.0](./versions/CIF-ontology/0.1.0/ddl/ddl.ttl) | [0.1.0](./versions/CIF-ontology/0.1.0/core/core.ttl) |
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ plugins:
# inherited_members: false
docstring_style: google
- awesome-pages
- dic2owl

nav:
- Home: index.md
Expand Down
4 changes: 2 additions & 2 deletions ontology/catalog-v001.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<catalog id="XML Catalog File" prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
<uri id="Imports Wizard Entry" name="http://emmo.info/emmo/cif-ddl" uri="cif-ddl.ttl"/>
<uri id="Imports Wizard Entry" name="http://emmo.info/emmo/cif-core" uri="cif-core.ttl"/>
<uri id="Imports Wizard Entry" name="http://emmo.info/CIF-ontology/ddl" uri="ddl.ttl"/>
<uri id="Imports Wizard Entry" name="http://emmo.info/CIF-ontology/core" uri="core.ttl"/>
</catalog>
Loading

0 comments on commit 7dad7b0

Please sign in to comment.