Skip to content

Commit

Permalink
Add OPDS
Browse files Browse the repository at this point in the history
  • Loading branch information
johnfactotum committed Dec 16, 2023
1 parent 7d55731 commit ad5ec4d
Show file tree
Hide file tree
Showing 3 changed files with 361 additions and 0 deletions.
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,33 @@ The TTS module doesn't directly handle speech output. Rather, its methods return

The SSML attributes `ssml:ph` and `ssml:alphabet` are supported. There's no support for PLS and CSS Speech.

### OPDS

The `opds.js` module can be used to implement OPDS clients. It can convert OPDS 1.x documents to OPDS 2.0:

- `getFeed(doc)`: converts an OPDS 1.x feed to OPDS 2.0. The argument must be a DOM Document object. You need to use a `DOMParser` to obtain a Document first if you have a string.
- `getPublication(entry)`: converts a OPDS 1.x entry in acquisition feeds to an OPDS 2.0 publication. The argument must be a DOM Element object.

It exports the following symbols for properties unsupported by OPDS 2.0:
- `SYMBOL.SUMMARY`: used on navigation links to represent the summary/content (see https://github.com/opds-community/drafts/issues/51)
- `SYMBOL.CONTENT`: used on publications to represent the content/description and its type. This is mainly for preserving the type info for XHTML. The value of this property is an object whose properties are:
- `.type`: either "text", "html", or "xhtml"
- `.value`: the value of the content

There are also two functions that can be used to implement search forms:

- `getOpenSearch(doc)`: for OpenSearch. The argument is a DOM Document object of an OpenSearch search document.
- `getSearch(link)` for templated search in OPDS 2.0. The argument must be an OPDS 2.0 Link object. Note that this function will import `uri-template.js`.

These two functions return an object that implements the following interface:
- `.metadata`: an object with the string properties `title` and `description`
- `.params`: an array, representing the search parameters, whose elements are objects whose properties are
- `ns`: a string; the namespace of the parameter
- `name`: a string; the name of the parameter
- `required`: a boolean, whether the parameter is required
- `value`: a string; the default value of the parameter
- `.search(map)`: a function, whose argument is a `Map` whose values are `Map`s (i.e. a two-dimensional map). The first key is the namespace of the search parameter. For non-namespaced parameters, the first key must be `null`. The second key is the parameter's name. Returns a string representing the URL of the search results.

### Supported Browsers

The main use of the library is for use in [Foliate](https://github.com/johnfactotum/foliate), which uses WebKitGTK. As such it's the only engine that has been tested extensively. But it should also work in Chromium and Firefox.
Expand Down
282 changes: 282 additions & 0 deletions opds.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
const NS = {
ATOM: 'http://www.w3.org/2005/Atom',
OPDS: 'http://opds-spec.org/2010/catalog',
THR: 'http://purl.org/syndication/thread/1.0',
DC: 'http://purl.org/dc/elements/1.1/',
DCTERMS: 'http://purl.org/dc/terms/',
}

const MIME = {
ATOM: 'application/atom+xml',
OPDS2: 'application/opds+json',
}

export const REL = {
ACQ: 'http://opds-spec.org/acquisition',
FACET: 'http://opds-spec.org/facet',
GROUP: 'http://opds-spec.org/group',
COVER: [
'http://opds-spec.org/image',
'http://opds-spec.org/cover',
],
THUMBNAIL: [
'http://opds-spec.org/image/thumbnail',
'http://opds-spec.org/thumbnail',
],
}

export const SYMBOL = {
SUMMARY: Symbol('summary'),
CONTENT: Symbol('content'),
}

const FACET_GROUP = Symbol('facetGroup')

const groupByArray = (arr, f) => {
const map = new Map()
if (arr) for (const el of arr) {
const keys = f(el)
for (const key of [keys].flat()) {
const group = map.get(key)
if (group) group.push(el)
else map.set(key, [el])
}
}
return map
}

// https://www.rfc-editor.org/rfc/rfc7231#section-3.1.1
const parseMediaType = str => {
if (!str) return null
const [mediaType, ...ps] = str.split(/ *; */)
return {
mediaType: mediaType.toLowerCase(),
parameters: Object.fromEntries(ps.map(p => {
const [name, val] = p.split('=')
return [name.toLowerCase(), val?.replace(/(^"|"$)/g, '')]
})),
}
}

export const isOPDSCatalog = str => {
const parsed = parseMediaType(str)
if (!parsed) return false
const { mediaType, parameters } = parsed
if (mediaType === MIME.OPDS2) return true
return mediaType === MIME.ATOM && parameters.profile?.toLowerCase() === 'opds-catalog'
}

// ignore the namespace if it doesn't appear in document at all
const useNS = (doc, ns) =>
doc.lookupNamespaceURI(null) === ns || doc.lookupPrefix(ns) ? ns : null

const filterNS = ns => ns
? name => el => el.namespaceURI === ns && el.localName === name
: name => el => el.localName === name

const getContent = el => {
if (!el) return
const type = el.getAttribute('type') ?? 'text'
const value = type === 'xhtml' ? el.innerHTML
: type === 'html' ? el.textContent
.replaceAll('&lt;', '<')
.replaceAll('&gt;', '>')
.replaceAll('&amp;', '&')
: el.textContent
return { value, type }
}

const getTextContent = el => {
const content = getContent(el)
if (content?.type === 'text') return content?.value
}

const getSummary = (a, b) => getTextContent(a) ?? getTextContent(b)

const getPrice = link => {
const price = link.getElementsByTagNameNS(NS.OPDS, 'price')[0]
return price ? {
currency: price.getAttribute('currencycode'),
value: price.textContent,
} : null
}

const getIndirectAcquisition = el => {
const ia = el.getElementsByTagNameNS(NS.OPDS, 'indirectAcquisition')[0]
if (!ia) return []
return [{ type: ia.getAttribute('type') }, ...getIndirectAcquisition(ia)]
}

const getLink = link => {
const obj = {
rel: link.getAttribute('rel')?.split(/ +/),
href: link.getAttribute('href'),
type: link.getAttribute('type'),
title: link.getAttribute('title'),
properties: {
price: getPrice(link),
indirectAcquisition: getIndirectAcquisition(link),
numberOfItems: link.getAttributeNS(NS.THR, 'count'),
},
[FACET_GROUP]: link.getAttributeNS(NS.OPDS, 'facetGroup'),
}
if (link.getAttributeNS(NS.OPDS, 'activeFacet') === 'true')
obj.rel = [obj.rel ?? []].flat().concat('self')
return obj
}

export const getPublication = entry => {
const filter = filterNS(useNS(entry.ownerDocument, NS.ATOM))
const children = Array.from(entry.children)
const filterDCEL = filterNS(NS.DC)
const filterDCTERMS = filterNS(NS.DCTERMS)
const filterDC = x => {
const a = filterDCEL(x), b = filterDCTERMS(x)
return y => a(y) || b(y)
}
const links = children.filter(filter('link')).map(getLink)
const linksByRel = groupByArray(links, link => link.rel)
return {
metadata: {
title: children.find(filter('title'))?.textContent ?? '',
author: children.filter(filter('author')).map(person => {
const NS = person.namespaceURI
const uri = person.getElementsByTagNameNS(NS, 'uri')[0]?.textContent
return {
name: person.getElementsByTagNameNS(NS, 'name')[0]?.textContent ?? '',
links: uri ? [{ href: uri }] : [],
}
}),
publisher: children.find(filterDC('publisher'))?.textContent,
published: (children.find(filterDCTERMS('issued'))
?? children.find(filterDC('date')))?.textContent,
language: children.find(filterDC('language'))?.textContent,
identifier: children.find(filterDC('identifier'))?.textContent,
subject: children.filter(filter('category')).map(category => ({
name: category.getAttribute('label'),
code: category.getAttribute('term'),
})),
[SYMBOL.CONTENT]: getContent(children.find(filter('content'))
?? children.find(filter('summary'))),
},
links,
images: REL.COVER.concat(REL.THUMBNAIL)
.map(R => linksByRel.get(R)?.[0]).filter(x => x),
}
}

export const getFeed = doc => {
const ns = useNS(doc, NS.ATOM)
const filter = filterNS(ns)
const children = Array.from(doc.documentElement.children)
const entries = children.filter(filter('entry'))
const links = children.filter(filter('link')).map(getLink)
const linksByRel = groupByArray(links, link => link.rel)

const groupedItems = new Map([[null, []]])
const groupLinkMap = new Map()
for (const entry of entries) {
const children = Array.from(entry.children)
const links = children.filter(filter('link')).map(getLink)
const linksByRel = groupByArray(links, link => link.rel)
const isPub = [...linksByRel.keys()]
.some(rel => rel?.startsWith(REL.ACQ) || rel === 'preview')

const groupLinks = linksByRel.get(REL.GROUP) ?? linksByRel.get('collection')
const groupLink = groupLinks?.length
? groupLinks.find(link => groupedItems.has(link.href)) ?? groupLinks[0] : null
if (groupLink && !groupLinkMap.has(groupLink.href))
groupLinkMap.set(groupLink.href, groupLink)

const item = isPub
? getPublication(entry)
: Object.assign(links.find(link => isOPDSCatalog(link.type)) ?? links[0] ?? {}, {
title: children.find(filter('title'))?.textContent,
[SYMBOL.SUMMARY]: getSummary(children.find(filter('summary')),
children.find(filter('content'))),
})

const arr = groupedItems.get(groupLink?.href ?? null)
if (arr) arr.push(item)
else groupedItems.set(groupLink.href, [item])
}
const [items, ...groups] = Array.from(groupedItems, ([key, items]) => {
const itemsKey = items[0]?.metadata ? 'publications' : 'navigation'
if (key == null) return { [itemsKey]: items }
const link = groupLinkMap.get(key)
return {
metadata: {
title: link.title,
numberOfItems: link.properties.numberOfItems,
},
links: [{ rel: 'self', href: link.href, type: link.type }],
[itemsKey]: items,
}
})
return {
metadata: {
title: children.find(filter('title'))?.textContent,
subtitle: children.find(filter('subtitle'))?.textContent,
},
links,
...items,
groups,
facets: Array.from(
groupByArray(linksByRel.get(REL.FACET) ?? [], link => link[FACET_GROUP]),
([facet, links]) => ({ metadata: { title: facet }, links })),
}
}

export const getSearch = async link => {
const { replace, getVariables } = await import('./uri-template.js')
return {
metadata: {
title: link.title,
},
search: map => replace(link.href, map.get(null)),
params: Array.from(getVariables(link.href), name => ({ name })),
}
}

export const getOpenSearch = doc => {
const defaultNS = doc.documentElement.namespaceURI
const filter = filterNS(defaultNS)
const children = Array.from(doc.documentElement.children)

const $$urls = children.filter(filter('Url'))
const $url = $$urls.find(url => isOPDSCatalog(url.getAttribute('type'))) ?? $$urls[0]
if (!$url) throw new Error('document must contain at least one Url element')

const regex = /{(?:([^}]+?):)?(.+?)(\?)?}/g
const defaultMap = new Map([
['count', '100'],
['startIndex', $url.getAttribute('indexOffset') ?? '0'],
['startPage', $url.getAttribute('pageOffset') ?? '0'],
['language', '*'],
['inputEncoding', 'UTF-8'],
['outputEncoding', 'UTF-8'],
])

const template = $url.getAttribute('template')
return {
metadata: {
title: (children.find(filter('LongName')) ?? children.find(filter('ShortName')))?.textContent,
description: children.find(filter('Description'))?.textContent,
},
search: map => template.replace(regex, (_, prefix, param) => {
const namespace = prefix ? $url.lookupNamespaceURI(prefix) : null
const ns = namespace === defaultNS ? null : namespace
const val = map.get(ns)?.get(param)
return encodeURIComponent(val ? val : (!ns ? defaultMap.get(param) ?? '' : ''))
}),
params: Array.from(template.matchAll(regex), ([, prefix, param, optional]) => {
const namespace = prefix ? $url.lookupNamespaceURI(prefix) : null
const ns = namespace === defaultNS ? null : namespace
return {
ns, name: param,
required: !optional,
value: ns && ns !== defaultNS ? '' : defaultMap.get(param) ?? '',
}
}),
}
}
52 changes: 52 additions & 0 deletions uri-template.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// URI Template: https://datatracker.ietf.org/doc/html/rfc6570

const regex = /{([+#./;?&])?([^}]+?)}/g
const varspecRegex = /(.+?)(\*|:[1-9]\d{0,3})?$/

const table = {
undefined: { first: '', sep: ',' },
'+': { first: '', sep: ',', allowReserved: true },
'.': { first: '.', sep: '.' },
'/': { first: '/', sep: '/' },
';': { first: ';', sep: ';', named: true, ifemp: '' },
'?': { first: '?', sep: '&', named: true, ifemp: '=' },
'&': { first: '&', sep: '&', named: true, ifemp: '=' },
'#': { first: '&', sep: '&', allowReserved: true },
}

// 2.4.1 Prefix Values, "Note that this numbering is in characters, not octets"
const prefix = (maxLength, str) => {
let result = ''
for (const char of str) {
const newResult = char
if (newResult.length > maxLength) return result
else result = newResult
}
return result
}

export const replace = (str, map) => str.replace(regex, (_, operator, variableList) => {
const { first, sep, named, ifemp, allowReserved } = table[operator]
// TODO: this isn't spec compliant
const encode = allowReserved ? encodeURI : encodeURIComponent
const values = variableList.split(',').map(varspec => {
const match = varspec.match(varspecRegex)
if (!match) return
const [, name, modifier] = match
let value = map.get(name)
if (modifier?.startsWith(':')) {
const maxLength = parseInt(modifier.slice(1))
value = prefix(maxLength, value)
}
return [name, value ? encode(value) : null]
})
if (!values.filter(([, value]) => value).length) return ''
return first + values
.map(([name, value]) => value
? (named ? name + (value ? '=' + value : ifemp) : value) : '')
.filter(x => x).join(sep)
})

export const getVariables = str => new Set(Array.from(str.matchAll(regex),
([,, variableList]) => variableList.split(',')
.map(varspec => varspec.match(varspecRegex)?.[1])).flat())

0 comments on commit ad5ec4d

Please sign in to comment.