From 6954b4e4ce6008d9581d8106e061d58b29393853 Mon Sep 17 00:00:00 2001 From: Jens Pots Date: Fri, 12 Jul 2024 14:42:33 +0200 Subject: [PATCH] refactor: moved parser logic to concrete `JenaParser` class --- src/main/kotlin/Main.kt | 2 +- src/main/kotlin/extensions/Model.kt | 25 +++++ src/main/kotlin/parser/Parser.kt | 69 ++------------ .../impl/{RDFParser.kt => JenaParser.kt} | 92 ++++++++++++++----- src/test/kotlin/parser/ParserTest.kt | 4 +- 5 files changed, 105 insertions(+), 87 deletions(-) rename src/main/kotlin/parser/impl/{RDFParser.kt => JenaParser.kt} (80%) diff --git a/src/main/kotlin/Main.kt b/src/main/kotlin/Main.kt index 7667bdc..fd2d345 100644 --- a/src/main/kotlin/Main.kt +++ b/src/main/kotlin/Main.kt @@ -17,7 +17,7 @@ fun main(args: Array) = runBlocking { val file = File(path) // Parse said config to a IRPipeline. - val parser = Parser(file) + val parser = Parser.using(file) // Parse the pipeline out of the configuration file. val pipeline = parser.pipelines[0] diff --git a/src/main/kotlin/extensions/Model.kt b/src/main/kotlin/extensions/Model.kt index 039e9c4..3154ee2 100644 --- a/src/main/kotlin/extensions/Model.kt +++ b/src/main/kotlin/extensions/Model.kt @@ -2,6 +2,9 @@ package technology.idlab.extensions import java.io.ByteArrayOutputStream import org.apache.jena.rdf.model.Model +import org.apache.jena.rdf.model.Property +import org.apache.jena.rdf.model.RDFNode +import org.apache.jena.rdf.model.Resource import org.apache.jena.shacl.ShaclValidator import technology.idlab.util.Log @@ -19,3 +22,25 @@ internal fun Model.validate(): Model { return this } + +/** + * Return the first object which corresponds to a subject and predicate. Returns null if not found. + */ +internal fun Model.objectOfProperty(resource: Resource, property: Property): RDFNode? { + return try { + this.listObjectsOfProperty(resource, property).next() + } catch (e: NoSuchElementException) { + null + } +} + +/** + * Return the first subject which corresponds to a predicate and object. Returns null if not found. + */ +internal fun Model.subjectWithProperty(property: Property, obj: RDFNode): Resource? { + return try { + this.listSubjectsWithProperty(property, obj).next() + } catch (e: NoSuchElementException) { + null + } +} diff --git a/src/main/kotlin/parser/Parser.kt b/src/main/kotlin/parser/Parser.kt index 93d081b..5da4358 100644 --- a/src/main/kotlin/parser/Parser.kt +++ b/src/main/kotlin/parser/Parser.kt @@ -1,81 +1,28 @@ package technology.idlab.parser import java.io.File -import org.apache.jena.ontology.OntModelSpec -import org.apache.jena.rdf.model.Model -import org.apache.jena.rdf.model.ModelFactory -import org.apache.jena.rdf.model.Resource -import technology.idlab.extensions.validate -import technology.idlab.parser.impl.parseDependencies -import technology.idlab.parser.impl.parsePackages -import technology.idlab.parser.impl.parsePipelines -import technology.idlab.parser.intermediate.IRDependency +import technology.idlab.parser.impl.JenaParser import technology.idlab.parser.intermediate.IRPackage import technology.idlab.parser.intermediate.IRPipeline import technology.idlab.parser.intermediate.IRProcessor -import technology.idlab.resolver.Resolver /** * Parse an RDF file into an intermediate representation, and validate it against the ontology and * SHACL shapes. */ -class Parser(file: File) { - /** The Apache Jena model. */ - private val model: Model = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM) - +abstract class Parser(file: File) { /** The pipelines in the current configuration. */ - val pipelines: List + abstract val pipelines: List /** The packages in the current configuration. */ - val packages: List + abstract val packages: List /** List of all known processors. */ - val processors: List - - init { - // Load the RDF-Connect ontology. - val resource = this::class.java.getResource("/pipeline.ttl") - val config = resource!!.path!! - this.load(config) - - // Load the pipeline file into the parser. - this.load(file.path) - - // Retrieve dependencies. - val dependencies = this.dependencies() + abstract val processors: List - // Resolve all dependencies. - dependencies.forEach { - val path = Resolver.resolve(it) - this.load(path.toString()) + companion object { + fun using(file: File): Parser { + return JenaParser(file) } - - // Since we updated the model, we will once again check if the SHACL shapes are valid. - this.model.validate() - - // Parse the file. - this.pipelines = this.pipelines() - this.packages = this.packages() - this.processors = this.packages.map { it.processors }.flatten() - } - - /** Parse the file as a list of pipelines, returning its containing stages and dependencies. */ - private fun pipelines(): List { - return model.parsePipelines() - } - - /** Parse the model as a list of packages, returning the provided processors inside. */ - private fun packages(): List { - return model.parsePackages() - } - - /** Retrieve all dependencies in a given file. */ - private fun dependencies(): List { - return model.parseDependencies(null as Resource?) - } - - /** Load an additional file into the parser. */ - private fun load(path: String) { - this.model.read(path, "TURTLE") } } diff --git a/src/main/kotlin/parser/impl/RDFParser.kt b/src/main/kotlin/parser/impl/JenaParser.kt similarity index 80% rename from src/main/kotlin/parser/impl/RDFParser.kt rename to src/main/kotlin/parser/impl/JenaParser.kt index b5b50b4..542f5b6 100644 --- a/src/main/kotlin/parser/impl/RDFParser.kt +++ b/src/main/kotlin/parser/impl/JenaParser.kt @@ -1,12 +1,18 @@ package technology.idlab.parser.impl +import java.io.File +import org.apache.jena.ontology.OntModelSpec import org.apache.jena.rdf.model.Model -import org.apache.jena.rdf.model.Property +import org.apache.jena.rdf.model.ModelFactory import org.apache.jena.rdf.model.RDFNode import org.apache.jena.rdf.model.Resource import org.apache.jena.shacl.vocabulary.SHACLM import org.apache.jena.vocabulary.RDF import runner.Runner +import technology.idlab.extensions.objectOfProperty +import technology.idlab.extensions.subjectWithProperty +import technology.idlab.extensions.validate +import technology.idlab.parser.Parser import technology.idlab.parser.RDFC import technology.idlab.parser.intermediate.IRArgument import technology.idlab.parser.intermediate.IRDependency @@ -15,6 +21,7 @@ import technology.idlab.parser.intermediate.IRParameter import technology.idlab.parser.intermediate.IRPipeline import technology.idlab.parser.intermediate.IRProcessor import technology.idlab.parser.intermediate.IRStage +import technology.idlab.resolver.Resolver import technology.idlab.util.Log internal fun Resource.toRunnerTarget(): Runner.Target { @@ -44,28 +51,6 @@ internal fun Resource.toIRParameterType(): IRParameter.Type { } } -/** - * Return the first object which corresponds to a subject and predicate. Returns null if not found. - */ -internal fun Model.objectOfProperty(resource: Resource, property: Property): RDFNode? { - return try { - this.listObjectsOfProperty(resource, property).next() - } catch (e: NoSuchElementException) { - null - } -} - -/** - * Return the first subject which corresponds to a predicate and object. Returns null if not found. - */ -internal fun Model.subjectWithProperty(property: Property, obj: RDFNode): Resource? { - return try { - this.listSubjectsWithProperty(property, obj).next() - } catch (e: NoSuchElementException) { - null - } -} - /** * Create a mapping of String to IRParameter from a SHACL property. This is a recursive * implementation that will automatically parse nested classes. @@ -258,3 +243,64 @@ internal fun Model.parsePipeline(pipeline: Resource): IRPipeline { internal fun Model.parsePipelines(): List { return listSubjectsWithProperty(RDF.type, RDFC.pipeline).toList().map { parsePipeline(it) } } + +class JenaParser(file: File) : Parser(file) { + /** The Apache Jena model. */ + private val model: Model = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM) + + /** The pipelines in the current configuration. */ + override val pipelines: List + + /** The packages in the current configuration. */ + override val packages: List + + /** List of all known processors. */ + override val processors: List + + init { + // Load the RDF-Connect ontology. + val resource = this::class.java.getResource("/pipeline.ttl") + val config = resource!!.path!! + this.load(config) + + // Load the pipeline file into the parser. + this.load(file.path) + + // Retrieve dependencies. + val dependencies = this.dependencies() + + // Resolve all dependencies. + dependencies.forEach { + val path = Resolver.resolve(it) + this.load(path.toString()) + } + + // Since we updated the model, we will once again check if the SHACL shapes are valid. + this.model.validate() + + // Parse the file. + this.pipelines = this.pipelines() + this.packages = this.packages() + this.processors = this.packages.map { it.processors }.flatten() + } + + /** Parse the file as a list of pipelines, returning its containing stages and dependencies. */ + private fun pipelines(): List { + return model.parsePipelines() + } + + /** Parse the model as a list of packages, returning the provided processors inside. */ + private fun packages(): List { + return model.parsePackages() + } + + /** Retrieve all dependencies in a given file. */ + private fun dependencies(): List { + return model.parseDependencies(null as Resource?) + } + + /** Load an additional file into the parser. */ + private fun load(path: String) { + this.model.read(path, "TURTLE") + } +} diff --git a/src/test/kotlin/parser/ParserTest.kt b/src/test/kotlin/parser/ParserTest.kt index e9e0517..737d6a8 100644 --- a/src/test/kotlin/parser/ParserTest.kt +++ b/src/test/kotlin/parser/ParserTest.kt @@ -14,7 +14,7 @@ class ParserTest { private fun parse(resource: String): Parser { val uri = this::class.java.getResource(resource) val file = File(uri!!.toURI()) - return Parser(file) + return Parser.using(file) } @Test @@ -102,7 +102,7 @@ class ParserTest { fun stages() { val uri = this::class.java.getResource("/pipelines/basic/index.ttl") val file = File(uri!!.toURI()) - val parser = Parser(file) + val parser = Parser.using(file) val stages = parser.pipelines[0].stages