From cb1baac3b23ce9bd0419f6002a849a91605461a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bernd=20M=C3=BCller?= Date: Wed, 6 Nov 2024 10:27:36 +0100 Subject: [PATCH] tryout for graph enrichment --- .github/workflows/process_profile_script.py | 108 +++++++++++++------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/.github/workflows/process_profile_script.py b/.github/workflows/process_profile_script.py index f21eab5..37c3a15 100644 --- a/.github/workflows/process_profile_script.py +++ b/.github/workflows/process_profile_script.py @@ -1,7 +1,9 @@ from rdflib import Graph, Namespace, Literal, URIRef, BNode -from rdflib.namespace import RDF, RDFS, DCTERMS +from rdflib.graph import ConjunctiveGraph, Dataset +from rdflib.namespace import RDF, RDFS, DCTERMS, NamespaceManager import sys import rdflib +import json class ProcPofiles: def __init__(self, name): @@ -10,56 +12,86 @@ def __init__(self, name): # Function to generate RDF for a specified profile using triples according to the Profile Ontology. def generate_rdf_for_profile(self, profile_name, label, comment, publisher, is_profile_of, webpage_url, f, outputfilename, filetype): # Defining namespaces - bioschemas = Namespace("https://bioschemas.org/profiles/") + bioschemas = Namespace("https://discovery.biothings.io/view/bioschemas/") prof = Namespace("http://www.w3.org/ns/dx/prof/") role = Namespace("http://www.w3.org/ns/dx/prof/role/") schema = Namespace("http://schema.org/") + rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#") + owl = Namespace("http://www.w3.org/2002/07/owl/") + dcterms = Namespace("http://purl.org/dc/terms/") + print("Parsing", f, "as RDF graph with version", rdflib.__version__) # Loading JSON-LD from repository as graph using rdflib g = Graph() - g.parse(source=f, format="json-ld") - print("Parsing completed!") - # # Creating profile URI - # profile_uri = URIRef(str(bioschemas) + profile_name.capitalize() + "/") + namespace_manager = NamespaceManager(Graph(), bind_namespaces="none") + namespace_manager.reset() + namespace_manager.bind("bioschemas", bioschemas) + namespace_manager.bind("prof", prof) + namespace_manager.bind("role", role) + namespace_manager.bind("schema", schema) + + namespace_manager.bind("rdf", rdf) + namespace_manager.bind("rdfs", rdfs) + namespace_manager.bind("owl", owl) + namespace_manager.bind("dcterms", dcterms) + + for n in namespace_manager.namespaces(): + print(n) + + g.namespace_manager = namespace_manager + + g.parse(source=f, format="application/ld+json") + + + print("Parsing completed with size", len(g), "") + + print(g.subject_objects(), "\n\n#\n") + + for i in g: + for j in i: + print(type(j), j) + + # Creating profile URI + profile_uri = URIRef(str(bioschemas) + profile_name.capitalize() + "/") + + # Adding triples for webpage + if webpage_url: + webpage_descriptor = BNode() + g.add((profile_uri, prof.hasResource, webpage_descriptor)) + g.add((webpage_descriptor, RDF.type, prof.ResourceDescriptor)) + g.add((webpage_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/text/html"))) + g.add((webpage_descriptor, prof.role, role.example)) + g.add((webpage_descriptor, prof.role, role.guidance)) + g.add((webpage_descriptor, prof.hasArtifact, URIRef(webpage_url))) + - # # Adding triples for profile information - # g.add((profile_uri, RDF.type, prof.Profile)) - # g.add((profile_uri, RDFS.label, Literal(label))) - # g.add((profile_uri, RDFS.comment, Literal(comment))) - # g.add((profile_uri, DCTERMS.publisher, URIRef(publisher))) - # g.add((profile_uri, prof.isProfileOf, getattr(schema, is_profile_of))) - - # # Adding triples for webpage - # if webpage_url: - # webpage_descriptor = BNode() - # g.add((profile_uri, prof.hasResource, webpage_descriptor)) - # g.add((webpage_descriptor, RDF.type, prof.ResourceDescriptor)) - # g.add((webpage_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/text/html"))) - # g.add((webpage_descriptor, prof.role, role.example)) - # g.add((webpage_descriptor, prof.role, role.guidance)) - # g.add((webpage_descriptor, prof.hasArtifact, URIRef(webpage_url))) - - # # Adding triples for JSON-LD - # json_ld_descriptor = BNode() - # g.add((profile_uri, prof.hasResource, json_ld_descriptor)) - # g.add((json_ld_descriptor, RDF.type, prof.ResourceDescriptor)) - # g.add((json_ld_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/application/ld+json"))) - # g.add((json_ld_descriptor, prof.role, role.schema)) - # g.add((json_ld_descriptor, prof.role, role.specification)) - # g.add((json_ld_descriptor, prof.hasArtifact, URIRef(f))) - # g.add((json_ld_descriptor, prof.hasArtifact, URIRef("https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json"))) + # Adding triples for profile information + g.add((profile_uri, RDF.type, prof.Profile)) + g.add((profile_uri, RDFS.label, Literal(label))) + g.add((profile_uri, RDFS.comment, Literal(comment))) + g.add((profile_uri, DCTERMS.publisher, URIRef(publisher))) + g.add((profile_uri, prof.isProfileOf, getattr(schema, is_profile_of))) + + + + # Adding triples for JSON-LD + json_ld_descriptor = BNode() + g.add((profile_uri, prof.hasResource, json_ld_descriptor)) + g.add((json_ld_descriptor, RDF.type, prof.ResourceDescriptor)) + g.add((json_ld_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/application/ld+json"))) + g.add((json_ld_descriptor, prof.role, role.schema)) + g.add((json_ld_descriptor, prof.role, role.specification)) + g.add((json_ld_descriptor, prof.hasArtifact, URIRef(f))) + g.add((json_ld_descriptor, prof.hasArtifact, URIRef("https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json"))) # save the graph with additional profile triples # outfile = outputfilename+"."+filetype - context = { - "schema": "http://schema.org/", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "rdfs": "http://www.w3.org/2000/01/rdf-schema#", - "bioschemas": "https://discovery.biothings.io/view/bioschemas/"} + outfile = outputfilename - g.serialize(destination=outfile, format="json-ld", context=context) + g.serialize(destination=outfile, format="json-ld", auto_compact=True) print("Writing result to", outfile) g.close()