Skip to content

Commit

Permalink
Merge branch 'main' into submission-template
Browse files Browse the repository at this point in the history
  • Loading branch information
fhennig authored Dec 11, 2024
2 parents c7e061e + 43ca5e8 commit aefac6e
Show file tree
Hide file tree
Showing 10 changed files with 388 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,45 @@ class BackendSpringConfig {
}
}

/**
* Check whether configured metadatafields for earliestReleaseDate are actually fields and are of type date.
* Returns a non-empty list of errors if validation errors were found.
*/
internal fun validateEarliestReleaseDateFields(config: BackendConfig): List<String> {
val errors = mutableListOf<String>()
config.organisms.values.forEach {
val organism = it.schema.organismName
val allFields = it.schema.metadata.map { it.name }.toSet()
val dateFields = it.schema.metadata.filter { it.type == MetadataType.DATE }.map { it.name }.toSet()
it.schema.earliestReleaseDate.externalFields.forEach {
if (!allFields.contains(it)) {
errors.add(
"Error on organism $organism in earliestReleaseDate.externalFields: " +
"Field $it does not exist.",
)
} else {
if (!dateFields.contains(it)) {
errors.add(
"Error on organism $organism in earliestReleaseDate.externalFields: " +
"Field $it is not of type ${MetadataType.DATE}.",
)
}
}
}
}
return errors
}

fun readBackendConfig(objectMapper: ObjectMapper, configPath: String): BackendConfig {
val config = objectMapper.readValue<BackendConfig>(File(configPath))
logger.info { "Loaded backend config from $configPath" }
logger.info { "Config: $config" }
return objectMapper.readValue(File(configPath))
val validationErrors = validateEarliestReleaseDateFields(config)
if (validationErrors.isNotEmpty()) {
throw IllegalArgumentException(
"The configuration file at $configPath is invalid: " +
validationErrors.joinToString(" "),
)
}
return config
}
3 changes: 3 additions & 0 deletions backend/src/main/kotlin/org/loculus/backend/config/Config.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ data class Schema(
val organismName: String,
val metadata: List<Metadata>,
val externalMetadata: List<ExternalMetadata> = emptyList(),
val earliestReleaseDate: EarliestReleaseDate = EarliestReleaseDate(false, emptyList()),
)

// The Json property names need to be kept in sync with website config enum `metadataPossibleTypes` in `config.ts`
Expand Down Expand Up @@ -76,3 +77,5 @@ data class ExternalMetadata(
override val type: MetadataType,
override val required: Boolean = false,
) : BaseMetadata()

data class EarliestReleaseDate(val enabled: Boolean = false, val externalFields: List<String>)
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import org.loculus.backend.service.submission.SubmissionDatabaseService
import org.loculus.backend.service.submission.UpdateTrackerTable
import org.loculus.backend.utils.Accession
import org.loculus.backend.utils.DateProvider
import org.loculus.backend.utils.EarliestReleaseDateFinder
import org.loculus.backend.utils.Version
import org.loculus.backend.utils.toTimestamp
import org.loculus.backend.utils.toUtcDateString
Expand Down Expand Up @@ -58,8 +59,22 @@ open class ReleasedDataModel(
val latestVersions = submissionDatabaseService.getLatestVersions(organism)
val latestRevocationVersions = submissionDatabaseService.getLatestRevocationVersions(organism)

val earliestReleaseDateConfig = backendConfig.getInstanceConfig(organism).schema.earliestReleaseDate
val finder = if (earliestReleaseDateConfig.enabled) {
EarliestReleaseDateFinder(earliestReleaseDateConfig.externalFields)
} else {
null
}

return submissionDatabaseService.streamReleasedSubmissions(organism)
.map { computeAdditionalMetadataFields(it, latestVersions, latestRevocationVersions) }
.map {
computeAdditionalMetadataFields(
it,
latestVersions,
latestRevocationVersions,
finder,
)
}
}

@Transactional(readOnly = true)
Expand All @@ -83,6 +98,7 @@ open class ReleasedDataModel(
rawProcessedData: RawProcessedData,
latestVersions: Map<Accession, Version>,
latestRevocationVersions: Map<Accession, Version>,
earliestReleaseDateFinder: EarliestReleaseDateFinder?,
): ProcessedData<GeneticSequence> {
val versionStatus = computeVersionStatus(rawProcessedData, latestVersions, latestRevocationVersions)

Expand All @@ -93,6 +109,8 @@ open class ReleasedDataModel(
NullNode.getInstance()
}

val earliestReleaseDate = earliestReleaseDateFinder?.calculateEarliestReleaseDate(rawProcessedData)

var metadata = rawProcessedData.processedData.metadata +
mapOf(
("accession" to TextNode(rawProcessedData.accession)),
Expand Down Expand Up @@ -126,6 +144,11 @@ open class ReleasedDataModel(
it + ("dataUseTermsUrl" to TextNode(url))
}
}
} +
if (earliestReleaseDate != null) {
mapOf("earliestReleaseDate" to TextNode(earliestReleaseDate.toUtcDateString()))
} else {
emptyMap()
}

return ProcessedData(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package org.loculus.backend.utils

import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import kotlinx.datetime.LocalTime
import mu.KotlinLogging
import org.loculus.backend.service.submission.RawProcessedData

private val log = KotlinLogging.logger { }

/**
* Calculate the earliest release date for rows of sequence entries given to it one by one.
* Assumes that rows are sorted: all accession entries are given in a block, and with ascending versions.
*
* The earliest release date of a sequence is the earliest date of:
* - the internal release date
* - any date from a given list of fields
* - the earliest release date from the previous version (if it exists)
*/
class EarliestReleaseDateFinder(private val fields: List<String>) {
private val earliestReleaseDateCache = mutableMapOf<String, LocalDateTime>()
private var previousRawProcessedData: RawProcessedData? = null

fun calculateEarliestReleaseDate(rawProcessedData: RawProcessedData): LocalDateTime {
assert(
previousRawProcessedData == null ||
rawProcessedData.accession > previousRawProcessedData!!.accession ||
(
rawProcessedData.accession == previousRawProcessedData!!.accession &&
rawProcessedData.version > previousRawProcessedData!!.version
),
) {
"Input is not ordered. Current: ${rawProcessedData.accession}.${rawProcessedData.version}, " +
"Previous: ${previousRawProcessedData!!.accession}.${previousRawProcessedData!!.version}"
}

var earliestReleaseDate = rawProcessedData.releasedAtTimestamp

fields.forEach { field ->
rawProcessedData.processedData.metadata[field]?.textValue()?.let { dateText ->
val date = try {
LocalDateTime(LocalDate.parse(dateText), LocalTime.fromSecondOfDay(0))
} catch (e: IllegalArgumentException) {
log.error {
"Unexpected error: Incorrectly formatted date on ${rawProcessedData.accession}." +
"${rawProcessedData.version} on field $field: $dateText " +
"Something is wrong with this instance: it might be a configuration error or a bug of " +
"the software. Please feel free to reach out to the developers if you need advice " +
"(https://github.com/loculus-project/loculus/issues/)."
}
null
}
if (date != null) {
earliestReleaseDate = if (date < earliestReleaseDate) date else earliestReleaseDate
}
}
}

earliestReleaseDateCache[rawProcessedData.accession]?.let { cached ->
if (cached < earliestReleaseDate) {
earliestReleaseDate = cached
} else {
earliestReleaseDateCache[rawProcessedData.accession] = earliestReleaseDate
}
} ?: run {
earliestReleaseDateCache.clear() // Inputs are ordered; no need for previous values
earliestReleaseDateCache[rawProcessedData.accession] = earliestReleaseDate
}

previousRawProcessedData = rawProcessedData

return earliestReleaseDate
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package org.loculus.backend.config

import org.hamcrest.MatcherAssert.assertThat
import org.hamcrest.Matchers.`is`
import org.junit.jupiter.api.Assertions.assertTrue
import org.junit.jupiter.api.Test
import org.loculus.backend.controller.DEFAULT_ORGANISM

class BackendSpringConfigTest {

@Test
fun `GIVEN an empty config THEN the it is valid`() {
val conf = backendConfig(emptyList(), EarliestReleaseDate(false, emptyList()))

val errors = validateEarliestReleaseDateFields(conf)

assertTrue(errors.isEmpty())
}

@Test
fun `GIVEN a config with earliestReleaseDate configured with existing date fields THEN it is valid`() {
val conf = backendConfig(
listOf(
Metadata("foo", MetadataType.DATE),
Metadata("bar", MetadataType.DATE),
),
EarliestReleaseDate(true, listOf("foo", "bar")),
)

val errors = validateEarliestReleaseDateFields(conf)

assertTrue(errors.isEmpty())
}

@Test
fun `GIVEN a config with a missing external field in earliestReleaseDate THEN it is invalid`() {
val conf = backendConfig(
listOf(
Metadata("foo", MetadataType.DATE),
),
EarliestReleaseDate(true, listOf("foo", "bar")),
)

val errors = validateEarliestReleaseDateFields(conf)

assertThat(errors.size, `is`(1))
}

@Test
fun `GIVEN a config with an external field with incorrect type in earliestReleaseDate THEN it is invalid`() {
val conf = backendConfig(
listOf(
Metadata("foo", MetadataType.DATE),
Metadata("bar", MetadataType.STRING),
),
EarliestReleaseDate(true, listOf("foo", "bar")),
)

val errors = validateEarliestReleaseDateFields(conf)

assertThat(errors.size, `is`(1))
}
}

fun backendConfig(metadataList: List<Metadata>, earliestReleaseDate: EarliestReleaseDate) = BackendConfig(
organisms = mapOf(
DEFAULT_ORGANISM to InstanceConfig(
Schema(DEFAULT_ORGANISM, metadataList, earliestReleaseDate = earliestReleaseDate),
ReferenceGenome(emptyList(), emptyList()),
),
),
accessionPrefix = "FOO_",
dataUseTermsUrls = null,
)
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,21 @@ import kotlinx.datetime.Instant
import kotlinx.datetime.LocalDate
import kotlinx.datetime.LocalDateTime
import kotlinx.datetime.LocalTime
import kotlinx.datetime.TimeZone
import kotlinx.datetime.plus
import kotlinx.datetime.toInstant
import kotlinx.datetime.toLocalDateTime
import org.hamcrest.CoreMatchers.`is`
import org.hamcrest.MatcherAssert.assertThat
import org.hamcrest.Matchers
import org.hamcrest.Matchers.equalTo
import org.hamcrest.Matchers.greaterThan
import org.hamcrest.Matchers.hasSize
import org.hamcrest.Matchers.matchesPattern
import org.hamcrest.Matchers.not
import org.hamcrest.Matchers.notNullValue
import org.jetbrains.exposed.sql.batchInsert
import org.jetbrains.exposed.sql.transactions.transaction
import org.junit.jupiter.api.BeforeEach
import org.junit.jupiter.api.Test
import org.keycloak.representations.idm.UserRepresentation
Expand All @@ -44,6 +49,7 @@ import org.loculus.backend.controller.DEFAULT_GROUP
import org.loculus.backend.controller.DEFAULT_GROUP_CHANGED
import org.loculus.backend.controller.DEFAULT_GROUP_NAME
import org.loculus.backend.controller.DEFAULT_GROUP_NAME_CHANGED
import org.loculus.backend.controller.DEFAULT_ORGANISM
import org.loculus.backend.controller.DEFAULT_USER_NAME
import org.loculus.backend.controller.EndpointTest
import org.loculus.backend.controller.datauseterms.DataUseTermsControllerClient
Expand All @@ -56,6 +62,7 @@ import org.loculus.backend.controller.jwtForDefaultUser
import org.loculus.backend.controller.submission.GetReleasedDataEndpointWithDataUseTermsUrlTest.ConfigWithModifiedDataUseTermsUrlSpringConfig
import org.loculus.backend.controller.submission.SubmitFiles.DefaultFiles.NUMBER_OF_SEQUENCES
import org.loculus.backend.service.KeycloakAdapter
import org.loculus.backend.service.submission.SequenceEntriesTable
import org.loculus.backend.utils.Accession
import org.loculus.backend.utils.DateProvider
import org.loculus.backend.utils.Version
Expand Down Expand Up @@ -87,6 +94,7 @@ class GetReleasedDataEndpointTest(
@Autowired private val convenienceClient: SubmissionConvenienceClient,
@Autowired private val submissionControllerClient: SubmissionControllerClient,
@Autowired private val groupClient: GroupManagementControllerClient,
@Autowired private val dataUseTermsClient: DataUseTermsControllerClient,
) {
private val currentYear = Clock.System.now().toLocalDateTime(DateProvider.timeZone).year
private val currentDate = Clock.System.now().toLocalDateTime(DateProvider.timeZone).date.toString()
Expand Down Expand Up @@ -343,6 +351,51 @@ class GetReleasedDataEndpointTest(
assertThat(data[0].metadata, `is`(not(emptyMap())))
}

/**
* This test ist relevant for EarliestReleaseDateFinder which relies on this particular ordering to be returned.
*/
@Test
fun `GIVEN multiple accessions with multiple versions THEN results are ordered by accession and version`() {
val now = Clock.System.now().toLocalDateTime(TimeZone.UTC)
val accessions = listOf<Accession>("SEQ1", "SEQ2", "SEQ3", "SEQ4")
val versions = listOf<Version>(1L, 2L, 3L)
val accessionVersions = accessions.flatMap { versions.map(it::to) }

transaction {
val submittingGroupId = groupClient.createNewGroup()
.andExpect(status().isOk)
.andGetGroupId()

SequenceEntriesTable.batchInsert(accessionVersions.shuffled()) { (accession, version) ->
this[SequenceEntriesTable.accessionColumn] = accession
this[SequenceEntriesTable.versionColumn] = version
this[SequenceEntriesTable.groupIdColumn] = submittingGroupId
this[SequenceEntriesTable.submittedAtTimestampColumn] = now
this[SequenceEntriesTable.releasedAtTimestampColumn] = now
this[SequenceEntriesTable.organismColumn] = DEFAULT_ORGANISM
this[SequenceEntriesTable.submissionIdColumn] = "foo"
this[SequenceEntriesTable.submitterColumn] = "bar"
this[SequenceEntriesTable.approverColumn] = "baz"
}

dataUseTermsClient.changeDataUseTerms(DataUseTermsChangeRequest(accessions, DataUseTerms.Open))
}

val data = convenienceClient.getReleasedData(DEFAULT_ORGANISM)

// assert that the accessions are sorted
assertThat(data.size, Matchers.`is`(12))
val actualAccessionOrder = data.map { it.metadata["accession"]!!.asText() }
assertThat(actualAccessionOrder, equalTo(actualAccessionOrder.sorted()))

// assert that _within_ each accession block, it's sorted by version
val accessionChunks = data.groupBy { it.metadata["accession"]!!.asText() }
assertThat(accessionChunks.size, Matchers.`is`(accessions.size))
accessionChunks.values
.map { chunk -> chunk.map { it.metadata["version"]!!.asLong() } }
.forEach { assertThat(it, equalTo(it.sorted())) }
}

private fun prepareRevokedAndRevocationAndRevisedVersions(): PreparedVersions {
val preparedSubmissions = convenienceClient.prepareDataTo(Status.APPROVED_FOR_RELEASE)
convenienceClient.reviseAndProcessDefaultSequenceEntries(preparedSubmissions.map { it.accession })
Expand Down
Loading

0 comments on commit aefac6e

Please sign in to comment.