From 77c41f2b6ff6ebbbc3f905f41ed85a716206ae5f Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 14 Jun 2024 14:03:31 +0200 Subject: [PATCH 01/15] Retrieve GaLAHaD version for CMDI from version.yml --- .../kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt b/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt index d61b67f..8f708ea 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt @@ -11,12 +11,12 @@ class CmdiMetadata(transformMetadata: DocumentTransformMetadata) : LayerTransfor val tmp_dir: File = createTempDirectory("cmdi").toFile() } - val file: File init { var template = this::class.java.classLoader.getResource("CMDI-template.xml")!!.readText() val corpusMetadata: CorpusMetadata = this.transformMetadata.corpus.metadata.expensiveGet() + val docTitle = document.getUploadedRawFile().nameWithoutExtension // Current year, month and day, zero-padded val year = SimpleDateFormat("yyyy").format(Date()) @@ -24,7 +24,12 @@ class CmdiMetadata(transformMetadata: DocumentTransformMetadata) : LayerTransfor val day = SimpleDateFormat("dd").format(Date()) val date = "$year-$month-$day" - val docTitle = document.getUploadedRawFile().nameWithoutExtension + // Retrieve GaLAHaD version from the same version.yml used in the client about page. + val versionStream = this::class.java.classLoader.getResource("version.yml")!!.openStream() + val versionProperties = Properties() + versionProperties.load(versionStream) + val galahadVersion = versionProperties.getProperty("VERSION") + val replacements = mapOf( "CORPUS_NAME" to corpusMetadata.name, "DATE" to date, @@ -32,7 +37,7 @@ class CmdiMetadata(transformMetadata: DocumentTransformMetadata) : LayerTransfor "MONTH" to month, "DAY" to day, "PID" to document.uuid.toString(), - "GALAHAD_VERSION" to (System.getenv("GALAHAD_VERSION") ?: "!Unknown version!"), + "GALAHAD_VERSION" to galahadVersion, "TITLE" to docTitle, "SOURCE_NAME" to (corpusMetadata.sourceName ?: "!No source name defined!"), "SOURCE_URL" to (corpusMetadata.sourceURL?.toString() ?: "!No source URL defined!"), From 4aff82f2cf5444698db8cef63f7ebcc2654f79ee Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 14 Jun 2024 14:04:15 +0200 Subject: [PATCH 02/15] Add CMDI files as a zip in the export zip --- .../ivdnt/galahad/data/DocumentsController.kt | 2 +- .../org/ivdnt/galahad/data/corpus/Corpus.kt | 8 +++++++- .../kotlin/org/ivdnt/galahad/util/ZipFile.kt | 17 ++++++++++------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/data/DocumentsController.kt b/server/src/main/kotlin/org/ivdnt/galahad/data/DocumentsController.kt index 8c2fc13..7b97d00 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/data/DocumentsController.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/data/DocumentsController.kt @@ -95,7 +95,7 @@ class DocumentsController( zip.entries().asSequence().forEach { entry -> zip.getInputStream(entry).use { input -> try { - if (!entry.isDirectory) { + if (!entry.isDirectory && entry.name.split(".").last() != "zip"){ logger.info("Unzipped ${entry.name} from ${file.originalFilename}. Will convert it to document.") // The entry might be in a subfolder, so extract the true file name. val fileName = Paths.get(entry.name).fileName.toString() diff --git a/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt b/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt index d42b73d..382cd55 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/data/corpus/Corpus.kt @@ -17,7 +17,9 @@ import org.ivdnt.galahad.taggers.Taggers import org.ivdnt.galahad.util.createZipFile import java.io.File import java.io.OutputStream +import java.nio.file.Files import java.util.* +import kotlin.io.path.createTempDirectory /** * A corpus is a collection of documents, metadata and jobs, saved to a folder. The folder contents are: @@ -194,7 +196,11 @@ class Corpus( executeAndLogTime("Generating $name zip") { val convertedDocs = documents.asSequence().map(formatMapper) val docsToCmdi = documents.asSequence().map { CmdiMetadata(ctm.documentMetadata(it.name)).file } - zipFile = createZipFile(convertedDocs + docsToCmdi, outputStream) + val cmdiZip = createZipFile(docsToCmdi, includeCMDI = true) + // rename the cmdiZip to "metadata" + val dest = File(createTempDirectory("metadata").toFile(), "metadata.zip") + Files.move(cmdiZip.toPath(), dest.toPath()) + zipFile = createZipFile(convertedDocs + dest, outputStream) } return zipFile!! } diff --git a/server/src/main/kotlin/org/ivdnt/galahad/util/ZipFile.kt b/server/src/main/kotlin/org/ivdnt/galahad/util/ZipFile.kt index e964ead..9209b64 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/util/ZipFile.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/util/ZipFile.kt @@ -11,9 +11,10 @@ import java.util.zip.ZipOutputStream * @param files Sequence of files to be zipped. As it is a Sequence type, * you may want to perform some transformations to map data to a file. * @param outStream If provided, used as a ZipOutputStream. + * @param includeCMDI include the GaLAHaD CMDI template files in the zip. * @return The flushed and closed zipfile. */ -fun createZipFile(files: Sequence, outStream: OutputStream? = null): File { +fun createZipFile(files: Sequence, outStream: OutputStream? = null, includeCMDI: Boolean = false): File { // Create zip and stream. val zipFile = File.createTempFile("tmp", ".zip") val zipStream = ZipOutputStream( @@ -27,12 +28,14 @@ fun createZipFile(files: Sequence, outStream: OutputStream? = null): File zipStream.putNextEntry(ZipEntry(f.name)) zipStream.write(f.readBytes()) } - // Always add CMDI to zips - val cmdis = listOf("TextProfileINT_GaLAHaD.xml", "TextProfileINT_GaLAHaD.xsd") - for (cmdi in cmdis) { - val cmdiFile = getResourceStream(cmdi) - zipStream.putNextEntry(ZipEntry(cmdi)) - zipStream.write(cmdiFile!!.readBytes()) + + if (includeCMDI) { + val cmdis = listOf("TextProfileINT_GaLAHaD.xml", "TextProfileINT_GaLAHaD.xsd") + for (cmdi in cmdis) { + val cmdiFile = getResourceStream(cmdi) + zipStream.putNextEntry(ZipEntry(cmdi)) + zipStream.write(cmdiFile!!.readBytes()) + } } // Close zipStream.flush() From 5e12c1aa9a0297b623b24139c7302ae59865a52d Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 14 Jun 2024 14:06:16 +0200 Subject: [PATCH 03/15] Fix empty strings in CMDI So we now check both for null and empty --- .../kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt | 12 +++++++----- .../org/ivdnt/galahad/port/tei/export/TEIMetadata.kt | 5 +++-- .../org/ivdnt/galahad/util/StringExtensions.kt | 4 ++++ .../main/kotlin/org/ivdnt/galahad/util/UTF8Util.kt | 1 + 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt b/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt index 8f708ea..120c843 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/port/CmdiMetadata.kt @@ -1,6 +1,8 @@ package org.ivdnt.galahad.port import org.ivdnt.galahad.data.corpus.CorpusMetadata +import org.ivdnt.galahad.util.escapeXML +import org.ivdnt.galahad.util.toNonEmptyString import java.io.File import java.text.SimpleDateFormat import java.util.* @@ -39,18 +41,18 @@ class CmdiMetadata(transformMetadata: DocumentTransformMetadata) : LayerTransfor "PID" to document.uuid.toString(), "GALAHAD_VERSION" to galahadVersion, "TITLE" to docTitle, - "SOURCE_NAME" to (corpusMetadata.sourceName ?: "!No source name defined!"), - "SOURCE_URL" to (corpusMetadata.sourceURL?.toString() ?: "!No source URL defined!"), + "SOURCE_NAME" to corpusMetadata.sourceName.toNonEmptyString("!No source name defined!"), + "SOURCE_URL" to corpusMetadata.sourceURL.toNonEmptyString("!No source URL defined!"), "ERA_FROM" to corpusMetadata.eraFrom.toString(), "ERA_TO" to corpusMetadata.eraTo.toString(), - "TAGSET" to (tagger.tagset ?: "!No tagset defined!"), + "TAGSET" to tagger.tagset.toNonEmptyString("!No tagset defined!"), "FORMAT" to document.format.identifier, "TAGGER_NAME" to tagger.id, - "TAGGER_VERSION" to tagger.version, //TODO + "TAGGER_VERSION" to tagger.version, "TAGGER_URL" to tagger.model.href, ) for ((key, value) in replacements) { - template = template.replace(key, value) + template = template.replace(key, value.escapeXML()) } file = tmp_dir.resolve("CMDI-$docTitle.xml") file.writeText(template) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/port/tei/export/TEIMetadata.kt b/server/src/main/kotlin/org/ivdnt/galahad/port/tei/export/TEIMetadata.kt index 92ff7ef..3d6af72 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/port/tei/export/TEIMetadata.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/port/tei/export/TEIMetadata.kt @@ -6,6 +6,7 @@ import org.ivdnt.galahad.port.LayerTransformer import org.ivdnt.galahad.port.xml.XMLMetadata import org.ivdnt.galahad.util.childOrNull import org.ivdnt.galahad.util.getXmlBuilder +import org.ivdnt.galahad.util.toNonEmptyString import org.w3c.dom.Document import org.w3c.dom.Element import org.w3c.dom.Node @@ -166,8 +167,8 @@ class TEIMetadata( private fun addNotesStmt(fileDesc: Element) { val notesStmt = fileDesc.getOrCreateChild("notesStmt") addNote(notesStmt, "corpusName", corpusMetadata.name) - addNote(notesStmt, "sourceCollection", corpusMetadata.sourceName ?: "!No source name defined!") - val url = corpusMetadata.sourceURL?.toString() ?: "!No source URL defined!" + addNote(notesStmt, "sourceCollection", corpusMetadata.sourceName.toNonEmptyString("!No source name defined!")) + val url = corpusMetadata.sourceURL.toNonEmptyString("!No source URL defined!") addNote(notesStmt, "sourceCollectionURL", url) } diff --git a/server/src/main/kotlin/org/ivdnt/galahad/util/StringExtensions.kt b/server/src/main/kotlin/org/ivdnt/galahad/util/StringExtensions.kt index cc0d241..0bcd00a 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/util/StringExtensions.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/util/StringExtensions.kt @@ -7,4 +7,8 @@ fun String.matchesUpTo(textToMatch: String): Int { else break } return matchingIndex +} + +fun Any?.toNonEmptyString(default: String): String { + return if (this == null || this.toString().isEmpty()) default else this.toString() } \ No newline at end of file diff --git a/server/src/main/kotlin/org/ivdnt/galahad/util/UTF8Util.kt b/server/src/main/kotlin/org/ivdnt/galahad/util/UTF8Util.kt index 1b9e95b..8bb3d69 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/util/UTF8Util.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/util/UTF8Util.kt @@ -25,6 +25,7 @@ fun String.escapeXML(): String { return this .replace("&","&") .replace("<","<") + .replace(">",">") .replace("\"",""") } From dbd474f8c3ab98dc3b120ff53c10adcfef52a4e0 Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 14 Jun 2024 15:10:47 +0200 Subject: [PATCH 04/15] Remove old evaluate router path --- client/src/router/index.ts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/client/src/router/index.ts b/client/src/router/index.ts index d847dee..f3e3641 100644 --- a/client/src/router/index.ts +++ b/client/src/router/index.ts @@ -118,18 +118,6 @@ const routes = [ { path: 'tagsets', component: TagsetsView }, { path: 'datasets', component: DatasetsView }, { path: 'benchmarks', component: BenchmarksView }, - { - path: 'evaluate', component: EvaluateView, - props: { basePath: "/overview/evaluate" }, - redirect: '/overview/evaluate/distribution', - children: [ - { path: 'distribution', component: DistributionView }, - { path: 'global_metrics', component: GlobalMetricsView }, - { path: 'grouped_metrics', component: GroupedMetricsView }, - { path: 'confusion', component: ConfusionView } - ] - }, - { path: 'contribute', component: ContributeTaggersView } ] }, { path: '/contribute', component: ContributeView, children: [ From e57e2e722317ee3460585f0a634f1fae232fb2de Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Fri, 14 Jun 2024 15:15:46 +0200 Subject: [PATCH 05/15] Use built in Kotlin Dispatchers.IO for file access --- .../org/ivdnt/galahad/FileBackedValue.kt | 57 +++---------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt b/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt index 5eb7262..791b1e8 100644 --- a/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt +++ b/server/src/main/kotlin/org/ivdnt/galahad/FileBackedValue.kt @@ -2,6 +2,9 @@ package org.ivdnt.galahad import com.fasterxml.jackson.core.type.TypeReference import com.fasterxml.jackson.databind.ObjectMapper +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.runBlocking +import kotlinx.coroutines.withContext import org.apache.logging.log4j.kotlin.Logging import java.io.* import java.nio.ByteBuffer @@ -60,21 +63,8 @@ open class FileBackedValue( // It was not set yet return initValue } - - val channel = FileChannel.open(file.toPath(), StandardOpenOption.READ) - channel.use { - val lock = lockFile( channel, false ) - lock.use { - val fileSize = channel.size().toInt() - val byteBuffer: ByteBuffer = ByteBuffer.allocate(fileSize) - channel.read(byteBuffer) - byteBuffer.flip() - val bytes: ByteArray = byteBuffer.array() - byteBuffer.clear() - return mapper.readValue( bytes, object : TypeReference() {}) - } - } - + val bytes: ByteArray = runBlocking(Dispatchers.IO) { file.inputStream().use { it.readBytes() }} + return mapper.readValue(bytes, object : TypeReference() {}) } /** @@ -88,41 +78,10 @@ open class FileBackedValue( if( !file.exists() ) { file.createNewFile() } - // Would love to do this atomically, but for now we won't val oldValue = read() - - val channel = FileChannel.open(file.toPath(), StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING ) - channel.use { - val lock = lockFile( channel, true ) - lock.use { - val newValue = modification( oldValue ) - val newValBytes = mapper.writeValueAsBytes( newValue ) - logger.trace("will write $newValue") - val byteBuffer = ByteBuffer.wrap( newValBytes ) - channel.write( byteBuffer ) - } - } + val newValue = modification(oldValue) + val newValBytes = mapper.writeValueAsBytes(newValue) + runBlocking(Dispatchers.IO) { file.writeBytes(newValBytes)} } - - fun lockFile( channel: FileChannel, exclusive: Boolean ): FileLock? { - while (true) { - try { - return if( exclusive ) { - channel.lock() - } else { - channel.lock(0L, Long.MAX_VALUE, true) - } - } catch (ignored: OverlappingFileLockException) { - } catch (ignored: IOException) { - } - try { - logger.info("Tried to read access $file but it was locked. Will sleep for ${LOCK_SLEEP_TIME}ms now then retry.") - Thread.sleep(LOCK_SLEEP_TIME) - } catch (e: InterruptedException) { - e.printStackTrace() - } - } - } - } \ No newline at end of file From d6d46aec042aee243e3519080eb85ee02bce538d Mon Sep 17 00:00:00 2001 From: Vincent Prins Date: Mon, 17 Jun 2024 12:23:00 +0200 Subject: [PATCH 06/15] Rename GTable.sortedByField to sortedByColumn --- .../src/components/modals/VariantsModal.vue | 92 +- client/src/components/tables/CorpusTable.vue | 304 +++--- .../src/components/tables/DocumentsTable.vue | 450 ++++----- client/src/components/tables/GTable.vue | 908 +++++++++--------- client/src/components/tables/MetricsTable.vue | 248 ++--- .../src/views/annotate/subviews/JobsView.vue | 2 +- .../evaluate/subviews/ConfusionView.vue | 438 ++++----- .../evaluate/subviews/DistributionView.vue | 436 ++++----- .../overview/subviews/BenchmarksView.vue | 318 +++--- 9 files changed, 1598 insertions(+), 1598 deletions(-) diff --git a/client/src/components/modals/VariantsModal.vue b/client/src/components/modals/VariantsModal.vue index f27fadd..b3ebc37 100644 --- a/client/src/components/modals/VariantsModal.vue +++ b/client/src/components/modals/VariantsModal.vue @@ -1,47 +1,47 @@ - - - - \ No newline at end of file diff --git a/client/src/components/tables/CorpusTable.vue b/client/src/components/tables/CorpusTable.vue index 75d1b43..d168bcf 100644 --- a/client/src/components/tables/CorpusTable.vue +++ b/client/src/components/tables/CorpusTable.vue @@ -1,152 +1,152 @@ - - - + + + diff --git a/client/src/components/tables/DocumentsTable.vue b/client/src/components/tables/DocumentsTable.vue index dfc0b5b..a097fa3 100644 --- a/client/src/components/tables/DocumentsTable.vue +++ b/client/src/components/tables/DocumentsTable.vue @@ -1,225 +1,225 @@ - - - + + + diff --git a/client/src/components/tables/GTable.vue b/client/src/components/tables/GTable.vue index 33ef4ec..2378169 100644 --- a/client/src/components/tables/GTable.vue +++ b/client/src/components/tables/GTable.vue @@ -1,455 +1,455 @@ - - - - - \ No newline at end of file diff --git a/client/src/components/tables/MetricsTable.vue b/client/src/components/tables/MetricsTable.vue index a66d600..eaed4d3 100644 --- a/client/src/components/tables/MetricsTable.vue +++ b/client/src/components/tables/MetricsTable.vue @@ -1,124 +1,124 @@ - - - - - + + + + + diff --git a/client/src/views/annotate/subviews/JobsView.vue b/client/src/views/annotate/subviews/JobsView.vue index cf9983c..40f9f53 100644 --- a/client/src/views/annotate/subviews/JobsView.vue +++ b/client/src/views/annotate/subviews/JobsView.vue @@ -1,7 +1,7 @@