Skip to content
This repository has been archived by the owner on Jul 29, 2022. It is now read-only.

Simplify metadata parsing by aligning with new specifications #102

Merged
merged 5 commits into from
May 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,6 @@ internal class PubMetadataAdapter(
contributors = contributors(null)
)

private val defaultLang = firstValue(Vocabularies.DCTERMS + "language")

val languages = items[Vocabularies.DCTERMS + "language"]?.map(MetadataItem::value).orEmpty()

val identifier: String?
Expand All @@ -218,7 +216,7 @@ internal class PubMetadataAdapter(
val localizedSortAs: LocalizedString?

init {
val titles = items[Vocabularies.DCTERMS + "title"]?.map { it.toTitle(defaultLang) }.orEmpty()
val titles = items[Vocabularies.DCTERMS + "title"]?.map { it.toTitle() }.orEmpty()
val mainTitle = titles.firstOrNull { it.type == "main" } ?: titles.firstOrNull()

localizedTitle = mainTitle?.value ?: LocalizedString(fallbackTitle)
Expand All @@ -231,31 +229,28 @@ internal class PubMetadataAdapter(
val belongsToCollections: List<Collection>

init {
if (epubVersion < 3.0) {
val calibreSeries = items["calibre:series"]?.firstOrNull()?.let {
val name = LocalizedString.fromStrings(mapOf(it.lang to it.value))
val position = firstValue("calibre:series_index")?.toDoubleOrNull()
Collection(localizedName = name, position = position)
}

belongsToSeries = listOfNotNull(calibreSeries)
belongsToCollections = emptyList()

} else {
val allCollections = items[Vocabularies.META + "belongs-to-collection"]
.orEmpty().map { it.toCollection(defaultLang) }
val (seriesMeta, collectionsMeta) = allCollections.partition { it.first == "series" }

belongsToSeries = seriesMeta.map(Pair<String?, Collection>::second)
belongsToCollections = collectionsMeta.map(Pair<String?, Collection>::second)
}
val allCollections = items[Vocabularies.META + "belongs-to-collection"]
.orEmpty().map { it.toCollection() }
val (seriesMeta, collectionsMeta) = allCollections.partition { it.first == "series" }

belongsToCollections = collectionsMeta.map(Pair<String?, Collection>::second)

belongsToSeries =
if (seriesMeta.isNotEmpty())
seriesMeta.map(Pair<String?, Collection>::second)
else
items["calibre:series"]?.firstOrNull()?.let {
val name = LocalizedString.fromStrings(mapOf(it.lang to it.value))
val position = firstValue("calibre:series_index")?.toDoubleOrNull()
listOf(Collection(localizedName = name, position = position))
}.orEmpty()
}

val subjects: List<Subject>

init {
val subjectItems = items[Vocabularies.DCTERMS + "subject"].orEmpty()
val parsedSubjects = subjectItems.map { it.toSubject(defaultLang) }
val parsedSubjects = subjectItems.map { it.toSubject() }
val hasToSplit = parsedSubjects.size == 1 && parsedSubjects.first().run {
localizedName.translations.size == 1 && code == null && scheme == null && sortAs == null
}
Expand All @@ -276,33 +271,15 @@ internal class PubMetadataAdapter(
private val allContributors: Map<String?, List<Contributor>>

init {
val creators = items[Vocabularies.DCTERMS + "creator"].orEmpty()
.map { it.toContributor(defaultLang, "aut") }
val publishers = items[Vocabularies.DCTERMS + "publisher"].orEmpty()
.map { it.toContributor(defaultLang, "pbl") }
val others = items[Vocabularies.DCTERMS + "contributor"].orEmpty()
.map { it.toContributor(defaultLang) }
val narrators = items[Vocabularies.MEDIA + "narrator"].orEmpty()
.map { it.toContributor(defaultLang, "nrt") }
val contributors = creators + publishers + narrators + others
val knownRoles = setOf("aut", "trl", "edt", "pbl", "art", "ill", "clr", "nrt")
allContributors = contributors.distributeBy(knownRoles, Contributor::roles)
}

private fun <K, V> List<V>.distributeBy(classes: Set<K>, transform: (V) -> kotlin.collections.Collection<K>): Map<K?, List<V>> {
/* Map all elements with [transform] and compute a [Map] with keys [null] and elements from [classes] and,
as values, lists of elements whose transformed values contain the key.
If a transformed element is in no class, it is assumed to be in [null] class. */

val map: MutableMap<K?, MutableList<V>> = mutableMapOf()
for (element in this) {
val transformed = transform(element).filter { it in classes }
if (transformed.isEmpty())
map.getOrPut(null) { mutableListOf() }.add(element)
for (v in transformed)
map.getOrPut(v) { mutableListOf() }.add(element)
}
return map
val contributors = items[Vocabularies.DCTERMS + "creator"].orEmpty() +
items[Vocabularies.DCTERMS + "contributor"].orEmpty() +
items[Vocabularies.DCTERMS + "publisher"].orEmpty() +
items[Vocabularies.MEDIA + "narrator"].orEmpty()

allContributors = contributors
.map(MetadataItem::toContributor)
.groupBy(Pair<String?, Contributor>::first)
.mapValues { it.value.map(Pair<String?, Contributor>::second) }
}

fun contributors(role: String?) = allContributors[role].orEmpty()
Expand Down Expand Up @@ -379,30 +356,42 @@ internal data class MetadataItem(
val children: Map<String, List<MetadataItem>> = emptyMap()
) {

fun toSubject(defaultLang: String?): Subject {
fun toSubject(): Subject {
require(property == Vocabularies.DCTERMS + "subject")
val values = localizedString(defaultLang)
val localizedSortAs = fileAs?.let { LocalizedString(it.second, if (it.first == "") defaultLang else it.first) }
val values = localizedString()
val localizedSortAs = fileAs?.let { LocalizedString(it.second, it.first) }
return Subject(values, localizedSortAs, authority, term)
}

fun toTitle(defaultLang: String?): Title {
fun toTitle(): Title {
require(property == Vocabularies.DCTERMS + "title")
val values = localizedString(defaultLang)
val localizedSortAs = fileAs?.let { LocalizedString(it.second, if (it.first == "") defaultLang else it.first) }
val values = localizedString()
val localizedSortAs = fileAs?.let { LocalizedString(it.second, it.first) }
return Title(values, localizedSortAs, titleType, displaySeq)
}

fun toContributor(defaultLang: String?, defaultRole: String? = null): Contributor {
fun toContributor(): Pair<String?, Contributor> {
require(property in listOf("creator", "contributor", "publisher").map { Vocabularies.DCTERMS + it } +
(Vocabularies.MEDIA + "narrator") + (Vocabularies.META + "belongs-to-collection"))
val names = localizedString(defaultLang)
val localizedSortAs = fileAs?.let { LocalizedString(it.second, if (it.first == "") defaultLang else it.first) }
return Contributor(names, localizedSortAs = localizedSortAs,
roles = roles(defaultRole), identifier = identifier, position = groupPosition)
val knownRoles = setOf("aut", "trl", "edt", "pbl", "art", "ill", "clr", "nrt")
val names = localizedString()
val localizedSortAs = fileAs?.let { LocalizedString(it.second, it.first) }
val roles = role.takeUnless { it in knownRoles }?.let { setOf(it) }.orEmpty()
val type = when(property) {
Vocabularies.META + "belongs-to-collection" -> collectionType
Vocabularies.DCTERMS + "creator" -> "aut"
Vocabularies.DCTERMS + "publisher" -> "pbl"
Vocabularies.MEDIA + "narrator" -> "nrt"
else -> role.takeIf { it in knownRoles } // Vocabularies.DCTERMS + "contributor"
}

val contributor = Contributor(names, localizedSortAs = localizedSortAs,
roles = roles, identifier = identifier, position = groupPosition)

return Pair(type, contributor)
}

fun toCollection(defaultLang: String?) = Pair(collectionType, toContributor(defaultLang))
fun toCollection() = toContributor()

fun toMap(): Any =
if (children.isEmpty())
Expand All @@ -413,7 +402,8 @@ internal data class MetadataItem(
}

private val fileAs
get() = children[Vocabularies.META + "file-as"]?.firstOrNull()?.let { Pair(it.lang, it.value) }
get() = children[Vocabularies.META + "file-as"]?.firstOrNull()?.let {
Pair(it.lang.takeUnless { it == "" } , it.value) }

private val titleType
get() = firstValue(Vocabularies.META + "title-type")
Expand All @@ -439,18 +429,13 @@ internal data class MetadataItem(
private val identifier
get() = firstValue(Vocabularies.DCTERMS + "identifier")

private fun localizedString(defaultLang: String?): LocalizedString {
val values = mapOf(lang to value).plus(alternateScript)
.mapKeys { if (it.key.isEmpty()) defaultLang else it.key }
return LocalizedString.fromStrings(values)
}
private val role
get() = firstValue(Vocabularies.META + "role")

private fun roles(default: String?): Set<String> {
val roles = allValues(Vocabularies.META + "role")
return if (roles.isEmpty() && default != null) setOf(default) else roles.toSet()
private fun localizedString(): LocalizedString {
val values = mapOf(lang.takeUnless { it == "" } to value).plus(alternateScript)
return LocalizedString.fromStrings(values)
}

private fun firstValue(property: String) = children[property]?.firstOrNull()?.value

private fun allValues(property: String) = children[property]?.map(MetadataItem::value).orEmpty()
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,14 @@ class ContributorParsingTest {

@Test
fun `dc_creator is by default an author`() {
val contributor = Contributor(
localizedName = LocalizedString("Author 1"),
roles = setOf("aut")
)
val contributor = Contributor(localizedName = LocalizedString("Author 1"))
assertThat(epub2Metadata.authors).contains(contributor)
assertThat(epub3Metadata.authors).contains(contributor)
}

@Test
fun `dc_publisher is by default a publisher`() {
val contributor = Contributor(
localizedName = LocalizedString("Publisher 1"),
roles = setOf("pbl")
)
fun `dc_publisher is a publisher`() {
val contributor = Contributor(localizedName = LocalizedString("Publisher 1"))
assertThat(epub2Metadata.publishers).contains(contributor)
assertThat(epub3Metadata.publishers).contains(contributor)
}
Expand All @@ -54,13 +48,6 @@ class ContributorParsingTest {
assertThat(epub3Metadata.contributors).contains(contributor)
}

@Test
fun `Refined roles override tag names`() {
val contributor = Contributor(localizedName = LocalizedString("Author 2"), roles = setOf("aut"))
assertThat(epub2Metadata.authors).contains(contributor)
assertThat(epub3Metadata.authors).contains(contributor)
}

@Test
fun `Unknown roles are ignored`() {
val contributor = Contributor(localizedName = LocalizedString("Contributor 2"), roles = setOf("unknown"))
Expand Down Expand Up @@ -90,57 +77,42 @@ class ContributorParsingTest {
}

@Test
fun `Multiple roles are all parsed (epub3 only)`() {
val contributor = Contributor(
localizedName = LocalizedString("Cameleon"),
roles = setOf("aut", "pbl")
)
fun `Only the first role is considered (epub3 only)`() {
val contributor = Contributor(localizedName = LocalizedString("Cameleon"))
mickael-menu marked this conversation as resolved.
Show resolved Hide resolved
assertThat(epub3Metadata.authors).contains(contributor)
assertThat(epub3Metadata.publishers).contains(contributor)
assertThat(epub3Metadata.publishers).doesNotContain(contributor)
}

@Test
fun `Media Overlays narrators are rightly parsed (epub3 only)`() {
val contributor = Contributor(localizedName = LocalizedString("Media Overlays Narrator"), roles = setOf("nrt"))
val contributor = Contributor(localizedName = LocalizedString("Media Overlays Narrator"))
assertThat(epub3Metadata.narrators).contains(contributor)
}

@Test
fun `Author is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Author 3"),
roles = setOf("aut")
)
val contributor = Contributor(localizedName = LocalizedString("Author 2"))
assertThat(epub2Metadata.authors).contains(contributor)
assertThat(epub3Metadata.authors).contains(contributor)
}

@Test
fun `Publisher is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Publisher 2"),
roles = setOf("pbl")
)
val contributor = Contributor(localizedName = LocalizedString("Publisher 2") )
assertThat(epub2Metadata.publishers).contains(contributor)
assertThat(epub3Metadata.publishers).contains(contributor)
}

@Test
fun `Translator is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Translator"),
roles = setOf("trl")
)
val contributor = Contributor(localizedName = LocalizedString("Translator"))
assertThat(epub2Metadata.translators).contains(contributor)
assertThat(epub3Metadata.translators).contains(contributor)
}

@Test
fun `Artist is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Artist"),
roles = setOf("art")
)
val contributor = Contributor(localizedName = LocalizedString("Artist"))
assertThat(epub2Metadata.artists).contains(contributor)
assertThat(epub3Metadata.artists).contains(contributor)
}
Expand All @@ -149,7 +121,7 @@ class ContributorParsingTest {
fun `Illustrator is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Illustrator"),
roles = setOf("ill")
roles = emptySet()
)
assertThat(epub2Metadata.illustrators).contains(contributor)
assertThat(epub3Metadata.illustrators).contains(contributor)
Expand All @@ -159,7 +131,7 @@ class ContributorParsingTest {
fun `Colorist is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Colorist"),
roles = setOf("clr")
roles = emptySet()
)
assertThat(epub2Metadata.colorists).contains(contributor)
assertThat(epub3Metadata.colorists).contains(contributor)
Expand All @@ -169,15 +141,15 @@ class ContributorParsingTest {
fun `Narrator is rightly parsed`() {
val contributor = Contributor(
localizedName = LocalizedString("Narrator"),
roles = setOf("nrt")
roles = emptySet()
)
assertThat(epub2Metadata.narrators).contains(contributor)
assertThat(epub3Metadata.narrators).contains(contributor)
}

@Test
fun `No more contributor than needed`() {
assertThat(epub2Metadata.authors).size().isEqualTo(3)
assertThat(epub2Metadata.authors).size().isEqualTo(2)
assertThat(epub2Metadata.publishers).size().isEqualTo(2)
assertThat(epub2Metadata.translators).size().isEqualTo(1)
assertThat(epub2Metadata.editors).size().isEqualTo(1)
Expand All @@ -187,8 +159,8 @@ class ContributorParsingTest {
assertThat(epub2Metadata.narrators).size().isEqualTo(1)
assertThat(epub2Metadata.contributors).size().isEqualTo(3)

assertThat(epub3Metadata.authors).size().isEqualTo(4)
assertThat(epub3Metadata.publishers).size().isEqualTo(3)
assertThat(epub3Metadata.authors).size().isEqualTo(3)
assertThat(epub3Metadata.publishers).size().isEqualTo(2)
assertThat(epub3Metadata.translators).size().isEqualTo(1)
assertThat(epub3Metadata.editors).size().isEqualTo(1)
assertThat(epub3Metadata.artists).size().isEqualTo(1)
Expand All @@ -208,14 +180,14 @@ class TitleTest {
assertThat(epub2Metadata.localizedTitle).isEqualTo(
LocalizedString.fromStrings(
mapOf(
"en" to "Alice's Adventures in Wonderland"
null to "Alice's Adventures in Wonderland"
)
)
)
assertThat(epub3Metadata.localizedTitle).isEqualTo(
LocalizedString.fromStrings(
mapOf(
"en" to "Alice's Adventures in Wonderland",
null to "Alice's Adventures in Wonderland",
"fr" to "Les Aventures d'Alice au pays des merveilles"
)
)
Expand Down Expand Up @@ -249,7 +221,7 @@ class TitleTest {
@Test
fun `The selected subtitle has the lowest display-seq property (epub3 only)`() {
val metadata = parsePackageDocument("package/title-multiple-subtitles.opf").metadata
assertThat(metadata.localizedSubtitle).isEqualTo(LocalizedString.fromStrings(mapOf("en" to "Subtitle 2")))
assertThat(metadata.localizedSubtitle).isEqualTo(LocalizedString.fromStrings(mapOf(null to "Subtitle 2")))
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,12 @@
<!-- dc:contributor is by default a Contributor -->
<dc:contributor>Contributor 1</dc:contributor>

<!-- Refined roles override tag names, so Author 2 is not considered a Publisher -->
<dc:publisher id="author-2" opf:role="aut">Author 2</dc:publisher>
<!-- Unknown role are ignored-->
<dc:contributor id="unknown" opf:role="unknown">Contributor 2</dc:contributor>
<!-- file-as attribute is parsed -->
<dc:contributor opf:file-as="Sorting Key">Contributor 3</dc:contributor>

<!-- Various roles -->
<dc:contributor id="author-3" opf:role="aut">Author 3</dc:contributor>
<dc:contributor id="author-2" opf:role="aut">Author 2</dc:contributor>
<dc:contributor id="publisher" opf:role="pbl">Publisher 2</dc:contributor>
<dc:contributor id="translator" opf:role="trl">Translator</dc:contributor>
<dc:contributor id="editor" opf:role="edt">Editor</dc:contributor>
Expand Down
Loading