Skip to content

Commit

Permalink
Make documentation outputs more stable
Browse files Browse the repository at this point in the history
  • Loading branch information
Ostrzyciel committed May 28, 2024
1 parent ad918c2 commit 16da6ad
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/main/scala/commands/CategoryDocGenCommand.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ object CategoryDocGenCommand extends Command:
val schemaRepoDir = FileSystems.getDefault.getPath(args(3))
val outDir = FileSystems.getDefault.getPath(args(4))

val catM = RDFDataMgr.loadModel(packageOutDir.resolve("category/metadata.ttl").toString)
val catM = RdfIoUtil.loadWithStableBNodeIds(packageOutDir.resolve("category/metadata.ttl"))
val catRes = catM.listSubjectsWithProperty(RDF.`type`, RdfUtil.Category).next.asResource
val version = RdfUtil.getString(catRes, RdfUtil.hasVersion).get

Expand Down Expand Up @@ -136,7 +136,7 @@ object CategoryDocGenCommand extends Command:
.filter(_.isDirectory)
.map(f => {
val taskName = f.getName
val taskM = RDFDataMgr.loadModel(metadataOutDir.resolve(f"tasks/task-$taskName.ttl").toString)
val taskM = RdfIoUtil.loadWithStableBNodeIds(metadataOutDir.resolve(f"tasks/task-$taskName.ttl"))
val taskRes = taskM.listSubjectsWithProperty(RDF.`type`, RdfUtil.Task).next.asResource
val title = RdfUtil.getString(taskRes, RdfUtil.dctermsTitle) getOrElse taskName
val description = Files.readString(f.toPath.resolve("index.md"))
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/commands/DatasetDocGenCommand.scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ object DatasetDocGenCommand extends Command:

DocFileUtil.copyDocs(datasetRepoDir.resolve("doc"), outputDir.resolve("docs"))

val metadata = RDFDataMgr.loadModel(metadataPath.toString)
val metadata = RdfIoUtil.loadWithStableBNodeIds(metadataPath)
val mi = MetadataReader.fromModel(metadata)
val landingPage = mi.datasetRes.listProperties(RdfUtil.dcatLandingPage)
.asScala.toSeq.head.getResource.getURI
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/commands/MainDocGenCommand.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ object MainDocGenCommand extends Command:

println("Generating main documentation...")
val ontologies = RdfIoUtil.loadOntologies(schemaRepoDir)
val mainMetadata = RDFDataMgr.loadModel(mainMetadataOutDir.resolve("metadata.ttl").toString)
val mainMetadata = RdfIoUtil.loadWithStableBNodeIds(mainMetadataOutDir.resolve("metadata.ttl"))
val mainDocOpt = DocBuilder.Options(
titleProps = Seq(
RdfUtil.hasLabelOverride,
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/commands/SchemaDocGenCommand.scala
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.github.riverbench.ci_worker
package commands

import util.AppConfig
import util.{AppConfig, RdfIoUtil}

import io.github.riverbench.ci_worker.util.doc.MarkdownUtil
import org.apache.jena.rdf.model.Model
Expand Down Expand Up @@ -35,7 +35,7 @@ object SchemaDocGenCommand extends Command:
println(f"Processing ${mdFile.getFileName}...")
val ontFile = inDir.resolve(mdFile.getFileName.toString.replace(".md", ".ttl"))
val outFile = outDir.resolve(mdFile.getFileName.toString)
val m = RDFDataMgr.loadModel(ontFile.toString)
val m = RdfIoUtil.loadWithStableBNodeIds(ontFile)
postProcessFile(m, version, mdFile, outFile)
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/util/MetadataReader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ case class MetadataInfo(
val sTypeIri = model.createResource(sType.iri)
val sTypeUsage = model.listResourcesWithProperty(RdfUtil.staxHasStreamType, sTypeIri)
.asScala.toSeq.head
val newUsage = datasetRes.getModel.createResource()
val newUsage = datasetRes.getModel.createResource(RdfUtil.newAnonId(sType.iri.getBytes))
sTypeUsage.listProperties().asScala
.foreach(p => newUsage.addProperty(p.getPredicate, p.getObject))
datasetRes.addProperty(RdfUtil.staxHasStreamTypeUsage, newUsage)
Expand Down Expand Up @@ -111,7 +111,7 @@ object MetadataReader:
* @return
*/
def read(repoDir: Path): MetadataInfo =
val model = RDFDataMgr.loadModel(repoDir.resolve("metadata.ttl").toString)
val model = RdfIoUtil.loadWithStableBNodeIds(repoDir.resolve("metadata.ttl"))
fromModel(model)

def fromModel(model: Model): MetadataInfo =
Expand Down
21 changes: 19 additions & 2 deletions src/main/scala/util/RdfIoUtil.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@ package io.github.riverbench.ci_worker
package util

import org.apache.jena.rdf.model.{Model, ModelFactory}
import org.apache.jena.riot.RDFDataMgr
import org.apache.jena.riot.{Lang, RDFDataMgr, RDFParser}
import org.apache.jena.riot.lang.LabelToNode
import org.apache.jena.riot.system.FactoryRDFStd

import java.nio.file.Path
import java.util.UUID

object RdfIoUtil:
private val labelToNode = LabelToNode.createScopeByDocumentHash(
UUID.fromString("13371337-1337-1337-1337-000000000000")
)
private val rdfFactory = FactoryRDFStd(labelToNode)

def loadOntologies(schemaRepoDir: Path): Model =
val ontologyPaths = Seq(
schemaRepoDir.resolve("src/metadata.ttl"),
Expand All @@ -17,7 +25,16 @@ object RdfIoUtil:

val model = ModelFactory.createDefaultModel()
for path <- ontologyPaths do
val m = RDFDataMgr.loadModel(path.toString)
val m = loadWithStableBNodeIds(path)
m.removeNsPrefix("")
model.add(m)
model

def loadWithStableBNodeIds(p: Path, lang: Lang = null): Model =
val model = ModelFactory.createDefaultModel()
RDFParser.create()
.factory(rdfFactory)
.source(p)
.lang(lang)
.parse(model)
model
20 changes: 18 additions & 2 deletions src/main/scala/util/RdfUtil.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package io.github.riverbench.ci_worker
package util

import org.apache.jena.rdf.model.{Model, ModelFactory, Property, Resource}
import org.apache.jena.rdf.model.{AnonId, Model, ModelFactory, Property, Resource}
import org.apache.jena.vocabulary.{RDF, RDFS, SKOS, VCARD}

import java.util.UUID
import scala.jdk.CollectionConverters.*
import scala.util.Random

object RdfUtil:
val m = ModelFactory.createDefaultModel()
Expand Down Expand Up @@ -174,4 +176,18 @@ object RdfUtil:
for m <- models.iterator do
model.add(m)
model



def newAnonId(seed: Object): AnonId =
val r = Random(seed.hashCode())
AnonId.create(UUID(r.nextLong(), r.nextLong()).toString)

/**
* Creates a new anonymous ID for a blank node based on some bytes as the seed.
* Note that for two identical seeds, the same ID will be generated.
*
* @param seed any hashable object
* @return a new AnonId
*/
def newAnonId(seed: Array[Byte]): AnonId =
AnonId.create(UUID.nameUUIDFromBytes(seed).toString)
2 changes: 1 addition & 1 deletion src/main/scala/util/StatCounterSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ object StatCounterSuite:
.zipWithIndex
.foreach((el, i) => {
val (name, stat) = el
val statRes = m.createResource()
val statRes = m.createResource(RdfUtil.newAnonId((name + stat.toString).getBytes))
statRes.addProperty(RDF.`type`, m.createResource(RdfUtil.pRb + name))
statRes.addProperty(RdfUtil.hasDocWeight, i.toString, XSDinteger)
stat.addToRdf(statRes)
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/util/io/SaveResult.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ case class SaveResult(io: IOResult, name: String, size: Long, md5: String, sha1:
case DistType.Jelly =>
distRes.addProperty(RdfUtil.dcatMediaType, core.Constants.jellyContentType)

val md5Checksum = distRes.getModel.createResource()
val md5Checksum = distRes.getModel.createResource(RdfUtil.newAnonId(md5.getBytes))
.addProperty(RDF.`type`, RdfUtil.SpdxChecksum)
.addProperty(RdfUtil.spdxAlgorithm, RdfUtil.spdxChecksumAlgorithmMd5)
.addProperty(RdfUtil.spdxChecksumValue, md5)
.addProperty(RdfUtil.hasDocWeight, "1", XSDinteger)
distRes.addProperty(RdfUtil.spdxChecksum, md5Checksum)

val sha1Checksum = distRes.getModel.createResource()
val sha1Checksum = distRes.getModel.createResource(RdfUtil.newAnonId(sha1.getBytes))
.addProperty(RDF.`type`, RdfUtil.SpdxChecksum)
.addProperty(RdfUtil.spdxAlgorithm, RdfUtil.spdxChecksumAlgorithmSha1)
.addProperty(RdfUtil.spdxChecksumValue, sha1)
Expand Down

0 comments on commit 16da6ad

Please sign in to comment.