tryout for graph enrichment

zbmed-semtec · Nov 6, 2024 · cb1baac · cb1baac
1 parent 3816d9b
commit cb1baac
Showing 1 changed file with 70 additions and 38 deletions.
diff --git a/.github/workflows/process_profile_script.py b/.github/workflows/process_profile_script.py
@@ -1,7 +1,9 @@
 from rdflib import Graph, Namespace, Literal, URIRef, BNode
-from rdflib.namespace import RDF, RDFS, DCTERMS
+from rdflib.graph import ConjunctiveGraph, Dataset
+from rdflib.namespace import RDF, RDFS, DCTERMS, NamespaceManager
 import sys
 import rdflib
+import json
 
 class ProcPofiles:
     def __init__(self, name):
@@ -10,56 +12,86 @@ def __init__(self, name):
     # Function to generate RDF for a specified profile using triples according to the Profile Ontology.
     def generate_rdf_for_profile(self, profile_name, label, comment, publisher, is_profile_of, webpage_url, f, outputfilename, filetype):
         # Defining namespaces
-        bioschemas = Namespace("https://bioschemas.org/profiles/")
+        bioschemas = Namespace("https://discovery.biothings.io/view/bioschemas/")
         prof = Namespace("http://www.w3.org/ns/dx/prof/")
         role = Namespace("http://www.w3.org/ns/dx/prof/role/")
         schema = Namespace("http://schema.org/")
 
+        rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+        rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
+        owl = Namespace("http://www.w3.org/2002/07/owl/")
+        dcterms = Namespace("http://purl.org/dc/terms/")
+
         print("Parsing", f, "as RDF graph with version", rdflib.__version__)
         # Loading JSON-LD from repository as graph using rdflib
         g = Graph()
-        g.parse(source=f, format="json-ld")
-        print("Parsing completed!")
 
-        # # Creating profile URI
-        # profile_uri = URIRef(str(bioschemas) + profile_name.capitalize() + "/")
+        namespace_manager = NamespaceManager(Graph(), bind_namespaces="none")
+        namespace_manager.reset()
+        namespace_manager.bind("bioschemas", bioschemas)
+        namespace_manager.bind("prof", prof)
+        namespace_manager.bind("role", role)
+        namespace_manager.bind("schema", schema)
+
+        namespace_manager.bind("rdf", rdf)
+        namespace_manager.bind("rdfs", rdfs)
+        namespace_manager.bind("owl", owl)
+        namespace_manager.bind("dcterms", dcterms)
+
+        for n in namespace_manager.namespaces():
+            print(n)
+
+        g.namespace_manager = namespace_manager
+
+        g.parse(source=f, format="application/ld+json")
+
+
+        print("Parsing completed with size", len(g), "")
+
+        print(g.subject_objects(), "\n\n#\n")
+
+        for i in g:
+            for j in i:
+                print(type(j), j)
+
+        # Creating profile URI
+        profile_uri = URIRef(str(bioschemas) + profile_name.capitalize() + "/")
+
+        # Adding triples for webpage
+        if webpage_url:
+            webpage_descriptor = BNode()
+            g.add((profile_uri, prof.hasResource, webpage_descriptor))
+            g.add((webpage_descriptor, RDF.type, prof.ResourceDescriptor))
+            g.add((webpage_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/text/html")))
+            g.add((webpage_descriptor, prof.role, role.example))
+            g.add((webpage_descriptor, prof.role, role.guidance))
+            g.add((webpage_descriptor, prof.hasArtifact, URIRef(webpage_url)))
+
 
-        # # Adding triples for profile information
-        # g.add((profile_uri, RDF.type, prof.Profile))
-        # g.add((profile_uri, RDFS.label, Literal(label)))
-        # g.add((profile_uri, RDFS.comment, Literal(comment)))
-        # g.add((profile_uri, DCTERMS.publisher, URIRef(publisher)))
-        # g.add((profile_uri, prof.isProfileOf, getattr(schema, is_profile_of)))
-
-        # # Adding triples for webpage
-        # if webpage_url:
-        #     webpage_descriptor = BNode()
-        #     g.add((profile_uri, prof.hasResource, webpage_descriptor))
-        #     g.add((webpage_descriptor, RDF.type, prof.ResourceDescriptor))
-        #     g.add((webpage_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/text/html")))
-        #     g.add((webpage_descriptor, prof.role, role.example))
-        #     g.add((webpage_descriptor, prof.role, role.guidance))
-        #     g.add((webpage_descriptor, prof.hasArtifact, URIRef(webpage_url)))
-
-        # # Adding triples for JSON-LD
-        # json_ld_descriptor = BNode()
-        # g.add((profile_uri, prof.hasResource, json_ld_descriptor))
-        # g.add((json_ld_descriptor, RDF.type, prof.ResourceDescriptor))
-        # g.add((json_ld_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/application/ld+json")))
-        # g.add((json_ld_descriptor, prof.role, role.schema))
-        # g.add((json_ld_descriptor, prof.role, role.specification))
-        # g.add((json_ld_descriptor, prof.hasArtifact, URIRef(f)))
-        # g.add((json_ld_descriptor, prof.hasArtifact, URIRef("https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json")))
+        # Adding triples for profile information
+        g.add((profile_uri, RDF.type, prof.Profile))
+        g.add((profile_uri, RDFS.label, Literal(label)))
+        g.add((profile_uri, RDFS.comment, Literal(comment)))
+        g.add((profile_uri, DCTERMS.publisher, URIRef(publisher)))
+        g.add((profile_uri, prof.isProfileOf, getattr(schema, is_profile_of)))
+
+
+
+        # Adding triples for JSON-LD
+        json_ld_descriptor = BNode()
+        g.add((profile_uri, prof.hasResource, json_ld_descriptor))
+        g.add((json_ld_descriptor, RDF.type, prof.ResourceDescriptor))
+        g.add((json_ld_descriptor, DCTERMS.format, URIRef("https://www.iana.org/assignments/media-types/application/ld+json")))
+        g.add((json_ld_descriptor, prof.role, role.schema))
+        g.add((json_ld_descriptor, prof.role, role.specification))
+        g.add((json_ld_descriptor, prof.hasArtifact, URIRef(f)))
+        g.add((json_ld_descriptor, prof.hasArtifact, URIRef("https://raw.githubusercontent.com/BioSchemas/bioschemas-dde/main/bioschemas.json")))
 
         # save the graph with additional profile triples
         # outfile = outputfilename+"."+filetype
-        context = { 
-            "schema": "http://schema.org/", 
-            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", 
-            "rdfs": "http://www.w3.org/2000/01/rdf-schema#", 
-            "bioschemas": "https://discovery.biothings.io/view/bioschemas/"}
+
         outfile = outputfilename
-        g.serialize(destination=outfile, format="json-ld", context=context)
+        g.serialize(destination=outfile, format="json-ld", auto_compact=True)
         print("Writing result to", outfile)
         g.close()