From 8d3961c9f4f78614278d4f761f690801e9b9a63b Mon Sep 17 00:00:00 2001 From: Omikhleia Date: Fri, 28 Jun 2024 20:16:37 +0200 Subject: [PATCH 01/16] feat(packages): Add lightweight CSL engine --- csl/core/engine.lua | 1120 +++++++++++++++++++++++++++++++ csl/core/locale.lua | 249 +++++++ csl/core/style.lua | 102 +++ csl/core/utils/casing.lua | 45 ++ csl/core/utils/superfolding.lua | 147 ++++ csl/core/utils/xmlparser.lua | 139 ++++ 6 files changed, 1802 insertions(+) create mode 100644 csl/core/engine.lua create mode 100644 csl/core/locale.lua create mode 100644 csl/core/style.lua create mode 100644 csl/core/utils/casing.lua create mode 100644 csl/core/utils/superfolding.lua create mode 100644 csl/core/utils/xmlparser.lua diff --git a/csl/core/engine.lua b/csl/core/engine.lua new file mode 100644 index 000000000..1215a699f --- /dev/null +++ b/csl/core/engine.lua @@ -0,0 +1,1120 @@ +--- A rendering engine for CSL 1.0.2 +-- +-- @copyright License: MIT (c) 2024 Omikhleia +-- +-- Public API: +-- - (constructor) CslEngine(style, locale) -> CslEngine +-- - CslEngine:cite(entry) -> string +-- - CslEngine:reference(entry) -> string +-- +-- Important: while some consistency checks are performed, this engine is not +-- intended to handle errors in the locale, style or input data. It is assumed +-- that they are all valid. +-- +-- THINGS NOT DONE +-- - disambiguation logic (not done at all) +-- - sorting logic (not done at all) +-- - other FIXME/TODOs in the code on specific features +-- +-- luacheck: no unused args + +local CslLocale = require("csl.core.locale").CslLocale + +local superfolding = require("csl.core.utils.superfolding") +local endash = luautf8.char(0x2013) + +local CslEngine = pl.class() + +--- (Constructor) Create a new CSL engine. +-- The optional extras table is for features not part of CSL 1.0.2. +-- Currently: +-- localizedPunctuation: boolean (default false) - use localized punctuation +-- +-- @tparam CslStyle style CSL style +-- @tparam CslLocale locale CSL locale +-- @tparam table extras Additional data to pass to the engine +-- @treturn CslEngine +function CslEngine:_init (style, locale, extras) + self.locale = locale + self.style = style + self.extras = extras + or { + localizedPunctuation = false, + italicExtension = true, + mathExtension = true, + } + + -- Shortcuts for often used style elements + self.macros = style.macros or {} + self.citation = style.citation or {} + self.locales = style.locales or {} + self.bibliography = style.bibliography or {} + self:_preprocess() + + -- Cache for some small string operations (e.g. XML escaping) + -- to avoid repeated processing. + self.cache = {} + + -- Early lookups for often used localized punctuation marks + self.punctuation = { + open_quote = self:_render_term("open-quote") or luautf8.char(0x201C), -- 0x201C curly left quote + close_quote = self:_render_term("close-quote") or luautf8.char(0x201D), -- 0x201D curly right quote + open_inner_quote = self:_render_term("open-inner-quote") or luautf8.char(0x2018), -- 0x2018 curly left single quote + close_inner_quote = self:_render_term("close-inner-quote") or luautf8.char(0x2019), -- 0x2019 curly right single quote + page_range_delimiter = self:_render_term("page-range-delimiter") or endash, -- FIXME: UNUSED AS OF NOW + [","] = self:_render_term("comma") or ",", + [";"] = self:_render_term("semicolon") or ";", + [":"] = self:_render_term("colon") or ":", + } + + -- Inheritable variables + -- There's a long list of such variables, but let's be dumb and just merge everything. + self.inheritable = { + citation = pl.tablex.union(self.style.globalOptions, self.style.citation and self.style.citation.options or {}), + bibliography = pl.tablex.union( + self.style.globalOptions, + self.style.bibliography and self.style.bibliography.options or {} + ), + } +end + +function CslEngine:_prerender (mode) + -- Stack for processing of cs:group as conditional + self.groupQueue = {} + self.groupState = { variables = {}, count = 0 } + + -- Track mode for processing: "citation" or "bibliography" + -- Needed to use appropriate inheritable options. + self.mode = mode + + -- Track first name for name-as-sort-order + self.firstName = true +end + +function CslEngine:_merge_locales (locale1, locale2) + -- FIXME TODO: + -- - Should we care about date formats and style options? + -- (PERHAPS, CHECK THE SPEC) + -- - Should we move this to the CslLocale class? + -- (LIKELY YES) + -- - Should we deepcopy the locale1 first, so it can be reused independently? + -- (LIKELY YES, instantiating a new CslLocale) + -- Merge terms, overriding existing ones + for term, forms in pairs(locale2.terms) do + if not locale1.terms[term] then + SU.debug("csl", "CSL local merging added:", term) + locale1.terms[term] = forms + else + for form, genderfs in pairs(forms) do + if not locale1.terms[term][form] then + SU.debug("csl", "CSL local merging added:", term, form) + locale1.terms[term][form] = genderfs + else + for genderform, value in pairs(genderfs) do + local replaced = locale1.terms[term][form][genderform] + SU.debug("csl", "CSL local merging", replaced and "replaced" or "added:", term, form, genderform) + locale1.terms[term][form][genderform] = value + end + end + end + end + end +end + +function CslEngine:_preprocess () + -- Handle locale overrides + if self.locales[self.locale.lang] then -- Direct language match + local override = CslLocale(self.locales[self.locale.lang]) + SU.debug("csl", "Locale override found for " .. self.locale.lang) + self:_merge_locales(self.locale, override) + else + for lang, locale in pairs(self.locales) do -- Fuzzy language matching + if self.locale.lang:sub(1, #lang) == lang then + local override = CslLocale(locale) + SU.debug("csl", "Locale override found for " .. self.locale.lang .. " -> " .. lang) + self:_merge_locales(self.locale, override) + end + end + end +end + +-- GROUP LOGIC (tracking variables in groups, conditional rendering) + +function CslEngine:_enterGroup () + self.groupState.count = self.groupState.count + 1 + SU.debug("csl", "Enter group", self.groupState.count, "level", #self.groupQueue) + + table.insert(self.groupQueue, self.groupState) + self.groupState = { variables = {}, count = 0 } +end + +function CslEngine:_leaveGroup (rendered) + -- Groups implicitly act as a conditional: if all variables that are called + -- are empty, the group is suppressed. + -- But the group is kept if no variable is called. + local emptyVariables = true + local hasVariables = false + for _, cond in pairs(self.groupState.variables) do + hasVariables = true + if cond then -- non-empty variable found + emptyVariables = false + break + end + end + local suppressGroup = hasVariables and emptyVariables + if suppressGroup then + rendered = nil -- Suppress group + end + self.groupState = table.remove(self.groupQueue) + -- A nested non-empty group is treated as a non-empty variable for the + -- purposes of determining suppression of the outer group. + -- So add a pseudo-variable for the inner group into the outer group, to + -- track this. + if not suppressGroup then + local groupCond = "_group_" .. self.groupState.count + self:_addGroupVariable(groupCond, true) + end + SU.debug( + "csl", + "Leave group", + self.groupState.count, + "level", + #self.groupQueue, + suppressGroup and "(suppressed)" or "(rendered)" + ) + return rendered +end + +function CslEngine:_addGroupVariable (variable, value) + SU.debug("csl", "Group variable", variable, value and "true" or "false") + self.groupState.variables[variable] = value and true or false +end + +-- INTERNAL HELPERS + +function CslEngine:_render_term (name, form, plural) + local t = self.locale:term(name, form, plural) + if t then + if self.cache[t] then + return self.cache[t] + end + t = self:_xmlEscape(t) + -- The CSL specification states, regarding terms: + -- "Superscripted Unicode characters can be used for superscripting." + -- We replace the latter with their normal form, wrapped in a command. + -- The result is cached in the term object to avoid repeated processing. + -- (Done after XML escaping as superfolding may add commands.) + t = superfolding(t) + self.cache[t] = t + end + return t +end + +function CslEngine:_render_text_specials (value) + -- Extensions for italic and math... + -- CAVEAT: the implementation is fairly naive. + local pieces = {} + for token in SU.gtoke(value, "%$([^$]+)%$") do + if token.string then + local s = token.string + if self.extras.italicExtension then + -- Typography: + -- Use pseudo-markdown italic extension (_text_) to wrap + -- the text in emphasis. + s = luautf8.gsub(s, "_([^_]+)_", "%1") + end + table.insert(pieces, s) + else + local m = token.separator + if self.extras.mathExtension then + -- Typography: + -- Use pseudo-markdown math extension ($text$) to wrap + -- the text in math mode (assumed to be in TeX-like syntax). + m = luautf8.gsub(m, "%$([^$]+)%$", "%1") + end + table.insert(pieces, m) + end + end + return table.concat(pieces) +end + +-- RENDERING ATTRIBUTES (strip-periods, affixes, formatting, text-case, display, quotes, delimiter) + +function CslEngine:_xmlEscape (t) + return t:gsub("&", "&"):gsub("<", "<"):gsub(">", ">") +end + +function CslEngine:_punctuation_extra (t) + if self.cache[t] then + return self.cache[t] + end + if self.extras.localizedPunctuation then + -- non-standard: localized punctuation + t = t:gsub("[,;:]", function (c) + return self.punctuation[c] or c + end) + end + t = self:_xmlEscape(t) + self.cache[t] = t + return t +end + +function CslEngine:_render_stripPeriods (t, options) + if t and options["strip-periods"] and t:sub(-1) == "." then + t = t:sub(1, -2) + end + return t +end + +function CslEngine:_render_affixes (t, options) + if not t then + return + end + if options.prefix then + local pref = self:_punctuation_extra(options.prefix) + t = pref .. t + end + if options.suffix then + local suff = self:_punctuation_extra(options.suffix) + t = t .. suff + end + return t +end + +function CslEngine:_render_formatting (t, options) + if not t then + return + end + if options["font-style"] == "italic" then -- FIXME: also normal, oblique, and how nesting is supposed to work? + t = "" .. t .. "" + end + if options["font-variant"] == "small-caps" then + -- To avoid (quoted) attributes in the output, as some global + -- substitutions might affect quotes, we use a simple "wrapper" command. + t = "" .. t .. "" + end + if options["font-weight"] == "bold" then -- FIXME: also light, normal, and how nesting is supposed to work? + t = "" .. t .. "" + end + if options["text-decoration"] == "underline" then + t = "" .. t .. "" + end + if options["vertical-align"] == "sup" then + t = "" .. t .. "" + end + if options["vertical-align"] == "sub" then + t = "" .. t .. "" + end + return t +end + +function CslEngine:_render_textCase (t, options) + if not t then + return + end + if options["text-case"] then + t = self.locale:case(t, options["text-case"]) + end + return t +end + +function CslEngine:_render_display (t, options) + if not t then + return + end + -- if options.display then + -- FIXME Add rationale for not supporting it... + -- Keep silent: it's not a critical feature yet + -- SU.warn("CSL display not implemented") + -- end + return t +end + +function CslEngine:_render_quotes (t, options) + if t and options.quotes then + -- Smart transform curly quotes in the input to localized inner quotes. + t = luautf8.gsub(t, "“", self.punctuation.open_inner_quote) + t = luautf8.gsub(t, "”", self.punctuation.close_inner_quote) + -- Smart transform straight quotes in the input to localized inner quotes. + t = luautf8.gsub(t, '^"', self.punctuation.open_inner_quote) + t = luautf8.gsub(t, '"$', self.punctuation.close_inner_quote) + t = luautf8.gsub(t, '([’%s])"', "%1" .. self.punctuation.open_inner_quote) + t = luautf8.gsub(t, '"([%s%p])', self.punctuation.close_inner_quote .. "%1") + -- Wrap the result in localized outer quotes. + t = self.punctuation.open_quote .. t .. self.punctuation.close_quote + end + return t +end + +function CslEngine:_render_link (t, link) + if t and link then + -- We'll let the processor implement CSL 1.0.2 link handling. + -- (appendix VI) + t = "" .. t .. "" + end + return t +end + +function CslEngine:_render_delimiter (ts, delimiter) -- ts is a table of strings + local d = delimiter and self:_punctuation_extra(delimiter) + return table.concat(ts, d) +end + +-- RENDERING ELEMENTS: layout, text, date, number, names, label, group, choose + +function CslEngine:_layout (options, content, entry) + local output = {} + local entries = type(entry) == "table" and not entry.type and entry or { entry } -- Multiple entries vs. single entry + for _, ent in ipairs(entries) do + local elem = self:_render_children(content, ent) + if elem then + table.insert(output, elem) + end + end + local t = self:_render_delimiter(output, options.delimiter) + t = self:_render_formatting(t, options) + t = self:_render_affixes(t, options) + return t +end + +function CslEngine:_text (options, content, entry) + local t + local link + if options.macro then + if self.macros[options.macro] then + t = self:_render_children(self.macros[options.macro], entry) + else + SU.error("CSL macro " .. options.macro .. " not found") + end + elseif options.term then + t = self:_render_term(options.term, options.form, options.plural) + elseif options.variable then + local variable = SU.required(options, "variable", "CSL text") + t = entry[variable] + self:_addGroupVariable(variable, t) + if variable == "locator" then + t = t and t.value + end + -- FIXME NOT IMPLEMENTED SPEC: + -- "May be accompanied by the form attribute to select the “long” + -- (default) or “short” form of a variable (e.g. the full or short + -- title). If the “short” form is selected but unavailable, the + -- “long” form is rendered instead." + -- But CSL-JSON etc. do not seem to have standard provision for it. + + if t and (variable == "URL" or variable == "DOI" or variable == "PMID" or variable == "PMCID") then + link = variable + end + elseif options.value then + t = options.value + else + SU.error("CSL text without macro, term, variable or value") + end + t = self:_render_stripPeriods(t, options) + t = self:_render_textCase(t, options) + t = self:_render_formatting(t, options) + t = self:_render_quotes(t, options) + t = self:_render_affixes(t, options) + if link then + t = self:_render_link(t, link) + elseif t and options.variable then + t = self:_render_text_specials(t) + end + t = self:_render_display(t, options) + return t +end + +function CslEngine:_a_day (options, day, month) -- month needed to get gender for ordinal + local form = options.form + local t + if form == "numeric-leading-zeros" then + t = ("%02d"):format(day) + elseif form == "ordinal" then + local genderForm + if month then + local monthKey = ("month-%02d"):format(month) + local _, gender = self:_render_term(monthKey) + genderForm = gender or "neuter" + end + if SU.boolean(self.locale.styleOptions["limit-day-ordinals-to-day-1"], false) then + t = day == 1 and self.locale:ordinal(day, "short", genderForm) or ("%d"):format(day) + else + t = self.locale:ordinal(day, "short", genderForm) + end + else -- "numeric" by default + t = ("%d"):format(day) + end + return t +end + +function CslEngine:_a_month (options, month) + local form = options.form + local t + if form == "numeric" then + t = ("%d"):format(month) + elseif form == "numeric-leading-zeros" then + t = ("%02d"):format(month) + else -- short or long (default) + local monthKey = ("month-%02d"):format(month) + t = self:_render_term(monthKey, form or "long") + end + t = self:_render_stripPeriods(t, options) + return t +end + +function CslEngine:_a_season (options, season) + local form = options.form + local t + if form == "numeric" or form == "numeric-leading-zeros" then + -- The CSL specification does not seem to forbid it, but a numeric value + -- for the season is a weird idea, so we skip it for now. + SU.warn("CSL season formatting as a number is ignored") + else + local seasonKey = ("season-%02d"):format(season) + t = self:_render_term(seasonKey, form or "long") + end + t = self:_render_stripPeriods(t, options) + return t +end + +function CslEngine:_a_year (options, year) + local form = options.form + local t + if tonumber(year) then + if form == "numeric-leading-zeros" then + t = ("%04d"):format(year) + elseif form == "short" then + -- The spec gives as example 2005 -> 05 + t = ("%02d"):format(year % 100) + else -- "long" by default + t = ("%d"):format(year) + end + else + -- Compat with BibLaTeX (literal might not be a number) + t = year + end + return t +end + +function CslEngine:_a_date_day (options, date) + local t + if date.day then + if type(date.day) == "table" then + local t1 = self:_a_day(options, date.day[1], date.month) + local t2 = self:_a_day(options, date.day[2], date.month) + local sep = options["range-delimiter"] or endash + t = t1 .. sep .. t2 + else + t = self:_a_day(options, date.day, date.month) + end + end + return t +end + +function CslEngine:_a_date_month (options, date) + local t + if date.month then + if type(date.month) == "table" then + local t1 = self:_a_month(options, date.month[1]) + local t2 = self:_a_month(options, date.month[2]) + local sep = options["range-delimiter"] or endash + t = t1 .. sep .. t2 + else + t = self:_a_month(options, date.month) + end + elseif date.season then + if type(date.season) == "table" then + local t1 = self:_a_season(options, date.season[1]) + local t2 = self:_a_season(options, date.season[2]) + local sep = options["range-delimiter"] or endash + t = t1 .. sep .. t2 + else + t = self:_a_season(options, date.season) + end + end + return t +end + +function CslEngine:_a_date_year (options, date) + local t + if date.year then + if type(date.year) == "table" then + local t1 = self:_a_year(options, date.year[1]) + local t2 = self:_a_year(options, date.year[2]) + local sep = options["range-delimiter"] or endash + t = t1 .. sep .. t2 + else + t = self:_a_year(options, date.year) + end + end + return t +end + +function CslEngine:_date_part (options, content, date) + local name = SU.required(options, "name", "cs:date-part") + -- FIXME TODO full date range are not implemented properly + -- But we need to decide how to encode them in the pseudo CSL-JSON... + local t + local callback = "_a_date_" .. name + if self[callback] then + t = self[callback](self, options, date) + else + SU.warn("CSL date part " .. name .. " not implemented yet") + end + t = self:_render_textCase(t, options) + t = self:_render_formatting(t, options) + t = self:_render_affixes(t, options) + return t +end + +function CslEngine:_date_parts (options, content, date) + local output = {} + local cond = false + for _, part in ipairs(content) do + local t = self:_date_part(part.options, part, date) + if t then + cond = true + table.insert(output, t) + end + end + if not cond then -- not a single part rendered + self:_addGroupVariable(options.variable, false) + return + end + self:_addGroupVariable(options.variable, true) + return self:_render_delimiter(output, options.delimiter) +end + +function CslEngine:_date (options, content, entry) + local variable = SU.required(options, "variable", "CSL number") + local date = entry[variable] + if date then + if options.form then + -- Use locale date format (form is either "numeric" or "text") + content = self.locale:date(options.form) + options.delimiter = nil -- Not supposed to exist when calling a locale date + -- When calling a localized date, the date-parts attribute is used to + -- determine which parts of the date to render: year-month-day (default), + -- year-month or year. + local dp = options["date-parts"] or "year-month-day" + local hasMonthOrSeason = dp == "year-month" or dp == "year-month-day" + local hasDay = dp == "year-month-day" + date = { + year = date.year, + month = hasMonthOrSeason and date.month or nil, + season = hasMonthOrSeason and date.season or nil, + day = hasDay and date.day or nil, + } + end + local t = self:_date_parts(options, content, date) + t = self:_render_textCase(t, options) + t = self:_render_formatting(t, options) + t = self:_render_affixes(t, options) + t = self:_render_display(t, options) + return t + else + self:_addGroupVariable(variable, false) + end +end + +function CslEngine:_number (options, content, entry) + local variable = SU.required(options, "variable", "CSL number") + local value = entry[variable] + self:_addGroupVariable(variable, value) + if variable == "locator" then -- special case + value = value and value.value + end + if value then + local _, gender = self:_render_term(variable) + local genderForm = gender or "neuter" + + -- FIXME TODO: Some complex stuff about name ranges, commas, etc. in the spec. + -- Moreover: + -- "Numbers with prefixes or suffixes are never ordinalized or rendered in roman numerals" + -- Interpretation: values that are not numbers are not formatted (?) + local form = tonumber(value) and options.form or "numeric" + if form == "ordinal" then + value = self.locale:ordinal(value, "short", genderForm) + elseif form == "long-ordinal" then + value = self.locale:ordinal(value, "long", genderForm) + elseif form == "roman" then + value = SU.formatNumber(value, { system = "roman" }) + end + end + value = self:_render_textCase(value, options) + value = self:_render_formatting(value, options) + value = self:_render_affixes(value, options) + value = self:_render_display(value, options) + return value +end + +function CslEngine:_enterSubstitute (t) + SU.debug("csl", "Enter substitute") + -- Some group and variable cancellation logic applies to cs:substitute. + -- Wrap it in a pseudo-group to track referenced variables. + self:_enterGroup() + return t +end + +function CslEngine:_leaveSubstitute (t, entry) + SU.debug("csl", "Leave substitute") + local vars = self.groupState.variables + -- "Substituted variables are considered empty for the purposes of + -- determining whether to suppress an enclosing cs:group." + -- So it's as if we hadn't seen any variable in our substitute. + self.groupState.variables = {} + -- "Substituted variables are suppressed in the rest of the output + -- to prevent duplication" + -- So if the substitution was successful, we remove referenced variables + -- from the entry. + if t then + for field, cond in pairs(vars) do + if cond then + entry[field] = nil + end + end + end + -- Terminate the pseudo-group + t = self:_leaveGroup(t) + return t +end + +function CslEngine:_substitute (options, content, entry) + local t + for _, child in ipairs(content) do + self:_enterSubstitute() + if child.command == "cs:names" then + SU.required(child.options, "variable", "CSL cs:names in cs:substitute") + local opts = pl.tablex.union(options, child.options) + t = self:_names_with_resolved_opts(opts, nil, entry) + else + t = self:_render_node(child, entry) + end + t = self:_leaveSubstitute(t, entry) + if t then -- First non-empty child is returned + break + end + end + return t +end + +function CslEngine:_name_et_al (options) + local t = self:_render_term(options.term or "et-al") + t = self:_render_formatting(t, options) + return t +end + +function CslEngine:_a_name (options, content, entry) + -- TODO FIXME: content can consists in name-part elements for formatting, text-case, affixes + -- Chigaco style does not seem to use them, so we keep it simple for now. + -- TODO FIXME: demote-non-dropping-particle option not implemented, and name particle not implemented at all! + if options.form == "short" then + return entry.family + end + if options["name-as-sort-order"] ~= "all" and not self.firstName then + -- Order is: Given Family + return entry.given and (entry.given .. " " .. entry.family) or entry.family + end + -- Order is: Family, Given + local sep = options["sort-separator"] or (self.punctuation[","] .. " ") + return entry.given and (entry.family .. sep .. entry.given) or entry.family +end + +local function hasField (list, field) + -- N.B. we want a true boolean here + if string.match(" " .. list .. " ", " " .. field .. " ") then + return true + end + return false +end + +function CslEngine:_names_with_resolved_opts (options, substitute_node, entry) + local variable = options.variable + local et_al_min = options.et_al_min + local et_al_use_first = options.et_al_use_first + local and_word = options.and_word + local name_delimiter = options.name_delimiter + local is_label_first = options.is_label_first + local label_opts = options.label_opts + local et_al_opts = options.et_al_opts + local name_node = options.name_node + local names_delimiter = options.names_delimiter + + -- Special case if both editor and translator are wanted and are the same person(s) + local editortranslator = false + if hasField(variable, "editor") and hasField(variable, "translator") then + editortranslator = entry.translator and entry.editor and pl.tablex.deepcompare(entry.translator, entry.editor) + if editortranslator then + entry.editortranslator = entry.editor + end + end + + -- Process + local vars = pl.stringx.split(variable, " ") + local output = {} + for _, var in ipairs(vars) do + self:_addGroupVariable(var, entry[var]) + + local skip = editortranslator and var == "translator" -- done via the "editor" field + if not skip and entry[var] then + local label + if label_opts then + local v = var == "editor" and editortranslator and "editortranslator" or var + local opts = pl.tablex.union(label_opts, { variable = v }) + label = self:_label(opts, nil, entry) + end + local needEtAl = false + local names = type(entry[var]) == "table" and entry[var] or { entry[var] } + local l = {} + for i, name in ipairs(names) do + if #names >= et_al_min and i > et_al_use_first then + needEtAl = true + break + end + local t = self:_a_name(name_node.options, name_node, name) + self.firstName = false + table.insert(l, t) + end + local joined + if needEtAl then + -- TODO THINGS TO SUPPORT THAT MIGHT REQUIRE A REFACTOR + -- They are not needed in Chicago style, so let's keep it simple for now. + -- delimiter-precedes-et-al ("contextual" by default = hard-coded) + -- et-al-use-last (default false, if true, the last is rendered as ", ... Name) instead of using et-al. + local rendered_et_all = self:_name_et_al(et_al_opts) + local sep_et_al = #l > 1 and name_delimiter or " " + joined = table.concat(l, name_delimiter) .. sep_et_al .. rendered_et_all + elseif #l == 1 then + joined = l[1] + else + local last = table.remove(l) + joined = table.concat(l, name_delimiter) .. " " .. and_word .. " " .. last + end + if label then + joined = is_label_first and (label .. joined) or (joined .. label) + end + table.insert(output, joined) + end + end + + if #output == 0 and substitute_node then + return self:_substitute(options, substitute_node, entry) + end + if #output == 0 then + return nil + end + local t = self:_render_delimiter(output, names_delimiter) + t = self:_render_formatting(t, options) + t = self:_render_affixes(t, options) + t = self:_render_display(t, options) + return t +end + +function CslEngine:_names (options, content, entry) + -- Extract needed elements and options from the content + local name_node = nil + local label_opts = nil + local et_al_opts = {} + local substitute = nil + local is_label_first = false + for _, child in ipairs(content) do + if child.command == "cs:substitute" then + substitute = child + elseif child.command == "cs:et-al" then + et_al_opts = child.options + elseif child.command == "cs:label" then + if not name_node then + is_label_first = true + end + label_opts = child.options + elseif child.command == "cs:name" then + name_node = child + end + end + if not name_node then + name_node = { command = "cs:name", options = {} } + end + -- Build inherited options + local inherited_opts = pl.tablex.union(self.inheritable[self.mode], options) + name_node.options = pl.tablex.union(inherited_opts, name_node.options) + name_node.options.form = name_node.options.form or inherited_opts["name-form"] + local et_al_min = tonumber(name_node.options["et-al-min"]) or 4 -- No default in the spec, using Chicago's + local et_al_use_first = tonumber(name_node.options["et-al-use-first"]) or 1 + local and_opt = name_node.options["and"] or "text" + local and_word = and_opt == "symbol" and "&" or self:_render_term("and") -- text by default + local name_delimiter = name_node.options.delimiter or inherited_opts["names-delimiter"] + -- local delimiter_precedes_et_al = name_node.options["delimiter-precedes-et-al"] -- TODO NOT IMPLEMENTED + + if not self.cache[name_delimiter] then + name_delimiter = self:_xmlEscape(name_delimiter) + self.cache[name_delimiter] = name_delimiter + end + + local resolved = { + variable = SU.required(name_node.options, "variable", "CSL names"), + et_al_min = et_al_min, + et_al_use_first = et_al_use_first, + and_word = and_word, + name_delimiter = self.cache[name_delimiter], + is_label_first = is_label_first, + label_opts = label_opts, + et_al_opts = et_al_opts, + name_node = name_node, + names_delimiter = options.delimiter or inherited_opts["names-delimiter"], + } + resolved = pl.tablex.union(options, resolved) + + return self:_names_with_resolved_opts(resolved, substitute, entry) +end + +function CslEngine:_label (options, content, entry) + local variable = SU.required(options, "variable", "CSL label") + local value = entry[variable] + self:_addGroupVariable(variable, value) + if variable == "locator" then + variable = value and value.label + value = value and value.value + end + if value then + local plural = options.plural + if plural == "always" then + plural = true + elseif plural == "never" then + plural = false + else -- "contextual" by default + if variable == "number-of-pages" or variable == "number-of-volumes" then + local v = tonumber(value) + plural = v and v > 1 or false + else + if type(value) == "table" then + plural = #value > 1 + else + local _, count = string.gsub(tostring(value), "%d+", "") -- naive count of numbers + plural = count > 1 + end + end + end + value = self:_render_term(variable, options.form or "long", plural) + value = self:_render_stripPeriods(value, options) + value = self:_render_textCase(value, options) + value = self:_render_formatting(value, options) + value = self:_render_affixes(value, options) + return value + end + return value +end + +function CslEngine:_group (options, content, entry) + self:_enterGroup() + + local t = self:_render_children(content, entry, { delimiter = options.delimiter }) + t = self:_render_formatting(t, options) + t = self:_render_affixes(t, options) + t = self:_render_display(t, options) + + t = self:_leaveGroup(t) -- Takes care of group suppression + return t +end + +function CslEngine:_if (options, content, entry) + local match = options.match or "all" + local conds = {} + if options.variable then + local vars = pl.stringx.split(options.variable, " ") + for _, var in ipairs(vars) do + local cond = entry[var] and true or false + table.insert(conds, cond) + end + end + if options.type then + local types = pl.stringx.split(options.type, " ") + local cond = false + -- Different from other conditions: + -- For types, Zeping Lee explained the matching is always "any". + for _, typ in ipairs(types) do + if entry.type == typ then + cond = true + break + end + end + table.insert(conds, cond) + end + if options["is-numeric"] then + for _, var in ipairs(pl.stringx.split(options["is-numeric"], " ")) do + -- TODO FIXME NOT IMPLEMENTED FULLY + -- Content is considered numeric if it solely consists of numbers. + -- Numbers may have prefixes and suffixes (“D2”, “2b”, “L2d”), and may + -- be separated by a comma, hyphen, or ampersand, with or without + -- spaces (“2, 3”, “2-4”, “2 & 4”). For example, “2nd” tests “true” whereas + -- “second” and “2nd edition” test “false”. + local cond = tonumber(entry[var]) and true or false + table.insert(conds, cond) + end + end + if options["is-uncertain-date"] then + for _, var in ipairs(pl.stringx.split(options["is-uncertain-date"], " ")) do + local d = type(entry[var]) == "table" and entry[var] + local cond = d and d.approximate and true or false + table.insert(conds, cond) + end + end + if options.locator then + local cond = entry.locator and entry.locator.label == options.locator + table.insert(conds, cond) + end + -- FIXME TODO other conditions: position, disambiguate + for _, v in ipairs({ "position", "disambiguate" }) do + if options[v] then + SU.warn("CSL if condition " .. v .. " not implemented yet") + end + end + -- Apply match + local matching = match ~= "any" + for _, cond in ipairs(conds) do + if match == "all" then + if not cond then + matching = false + break + end + elseif match == "any" then + if cond then + matching = true + break + end + elseif match == "none" then + if cond then + matching = false + break + end + end + end + if matching then + return self:_render_children(content, entry), true + -- FIXME: + -- The CSL specification says: "Delimiters from the nearest delimiters + -- from the nearest ancestor delimiting element are applied within the + -- output of cs:choose (i.e., the output of the matching cs:if, + -- cs:else-if, or cs:else; see delimiter)."" + -- Ugh. This is rather obscure and not implemented yet (?) + end + return nil, false +end + +function CslEngine:_choose (options, content, entry) + for _, c in ipairs(content) do + if c.command == "cs:if" or c.command == "cs:else-if" then + local t, match = self:_if(c.options, c, entry) + if match then + return t + end + elseif c.command == "cs:else" then + return self:_render_children(c, entry) + end + end +end + +function CslEngine:_sort (options, content, entry) + -- FIXME TODO + -- Silent for now. + -- SU.warn("CSL sort not implemented yet") +end + +-- PROCESSING + +function CslEngine:_render_node (node, entry) + local callback = node.command:gsub("cs:", "_") + if self[callback] then + return self[callback](self, node.options, node, entry) + else + SU.warn("Unknown CSL element " .. node.command .. " (" .. callback .. ")") + end +end + +function CslEngine:_render_children (ast, entry, context) + if not ast then + return + end + local ret = {} + context = context or {} + for _, content in ipairs(ast) do + if type(content) == "table" and content.command then + local r = self:_render_node(content, entry) + if r then + table.insert(ret, r) + end + else + SU.error("CSL unexpected content") -- Should not happen + end + end + + return #ret > 0 and self:_render_delimiter(ret, context.delimiter) +end + +function CslEngine:_postrender (text) + local rdquote = self.punctuation.close_quote + local ldquote = self.punctuation.open_quote + local rsquote = self.punctuation.close_inner_quote + local piquote = SU.boolean(self.locale.styleOptions["punctuation-in-quote"], false) + + -- Typography: Ensure there are no double straight quotes left from the input. + text = luautf8.gsub(text, '^"', ldquote) + text = luautf8.gsub(text, '"$', rdquote) + text = luautf8.gsub(text, '([%s%p])"', "%1" .. ldquote) + text = luautf8.gsub(text, '"([%s%p])', rdquote .. "%1") + -- HACK: punctuation-in-quote is applied globally, not just to generated quotes. + -- Not so sure it's the intended behavior from the specification? + if piquote then + -- move commas and periods before closing quotes + text = luautf8.gsub(text, "([" .. rdquote .. rsquote .. "]+)%s*([.,])", "%2%1") + end + -- HACK: fix some double punctuation issues. + -- Maybe some more robust way to handle affixes and delimiters would be better? + text = luautf8.gsub(text, "%.%.", ".") + -- Typography: Prefer to have commas and periods inside italics. + -- (Better looking if italic automated corrections are applied.) + text = luautf8.gsub(text, "()([%.,])", "%2%1") + -- HACK: remove extraneous periods after exclamation and question marks. + -- (Follows the preceding rule to also account for moved periods.) + text = luautf8.gsub(text, "([…!?])%.", "%1") + if not piquote then + -- HACK: remove extraneous periods after quotes. + -- Opinionated, e.g. for French at least, some typographers wouldn't + -- frown upon a period after a quote ending with an exclamation mark + -- or a question mark. But it's ugly. + text = luautf8.gsub(text, "([…!?%.]" .. rdquote .. ")%.", "%1") + end + return text +end + +function CslEngine:_process (entry, mode) + if mode ~= "citation" and mode ~= "bibliography" then + SU.error("CSL processing mode must be 'citation' or 'bibliography'") + end + self:_prerender(mode) + -- Deep copy the entry as cs:substitute may remove fields + entry = pl.tablex.deepcopy(entry) + local ast = self[mode] + if not ast then + SU.error("CSL style has no " .. mode .. " definition") + end + local res = self:_render_children(ast, entry) + return self:_postrender(res) +end + +--- Generate a citation string. +-- @tparam table entry TList of CSL-JSON entries +-- @treturn string The citation string +function CslEngine:cite (entry) + return self:_process(entry, "citation") +end + +--- Generate a reference string. +-- @tparam table entry TList of CSL-JSON entries +-- @treturn string The reference string +function CslEngine:reference (entry) + return self:_process(entry, "bibliography") +end + +return { + CslEngine = CslEngine, +} diff --git a/csl/core/locale.lua b/csl/core/locale.lua new file mode 100644 index 000000000..a80a94417 --- /dev/null +++ b/csl/core/locale.lua @@ -0,0 +1,249 @@ +--- Reader for CSL 1.0.2 locale files +-- +-- @copyright License: MIT (c) 2024 Omikhleia +-- +-- Public API: +-- - (static method) CslLocale.parse(doc) -> CslLocale +-- - (static method) CslLocale.read(filename) -> CslLocale +-- - CslLocale:date(form) -> table +-- - CslLocale:term(name, form?, plural?) -> string, gender +-- - CslLocale:ordinal(number, form?, gender-form?, plural?) -> string +-- - CslLocale:case(text, textCase) -> string +-- + +local casing = require("csl.core.utils.casing") + +local xmlparser = require("csl.core.utils.xmlparser") +local parse = xmlparser.parse +local rules = { + prefix = "cs:", + skipEmptyStrings = true, + preserveEmptyStrings = {}, + stripSpaces = true, + preserveSpaces = { text = true, title = true, id = true, term = true }, +} + +local CslLocale = pl.class() + +function CslLocale:_init (locale) + self.locale = locale + self.terms = {} + self.dates = {} + self.styleOptions = {} + self.lang = "und" + self:_preprocess() + self.locale = nil -- We don't need the AST anymore +end + +-- Store items in more convenient structures and maps +function CslLocale:_preprocess () + self.lang = self.locale.options["xml:lang"] + + for _, content in ipairs(self.locale) do + if content.command == "cs:terms" then + for _, term in ipairs(content) do + if term.command == "cs:term" then + local name = term.options.name + if not name then + SU.error("CSL locale term without name") + end + local form = term.options.form or "long" + -- gender-form is only used for ordinal terms, but it's simpler + -- to just store it for all terms and have a consistent internal + -- representation + local genderf = term.options["gender-form"] or "neuter" + + self.terms[name] = self.terms[name] or {} + self.terms[name][form] = self.terms[name][form] or {} + -- Whole term (not sub-content) for its attributes + self.terms[name][form][genderf] = term + end + end + elseif content.command == "cs:style-options" then + self.styleOptions = content.options + elseif content.command == "cs:date" then + local form = content.options.form + if not form then + SU.error("CSL locale date without form") + end + -- extract the cs:date-part sub-content + self.dates[form] = SU.ast.subContent(content) + end + end +end + +function CslLocale:_termvalue (term) -- luacheck: no unused args + return term[1] +end + +function CslLocale:_lookupTerm (name, form, genderf) + local t = self.terms[name] + if not t then + return nil + end + form = form or "long" + local f = t[form] + if not f then + -- If not found, check for form fallbacks + if form == "long" then + return nil -- (No fallback) + end + if form == "verb-short" then + form = "verb" + elseif form == "symbol" then + form = "short" + elseif form == "verb" or form == "short" then + form = "long" + end + return self:_lookupTerm(name, form, genderf) + end + genderf = genderf or "neuter" + local g = f[genderf] + if not g then + if genderf == "neuter" then + return nil -- (No fallback) + end + return self:_lookupTerm(name, form, "neuter") + end + SU.debug("csl", "Lookup term", name, form, genderf) + return g +end + +function CslLocale:_lookupShortOrdinal (number, genderf) + SU.debug("csl", "Lookup short-ordinal", number, genderf) + number = tonumber(number) + if not number then + SU.error("CSL ordinal term requires a number") + end + + -- Case 0-9 + if number < 10 then + local name = ("ordinal-%02d"):format(number) + local term = self:_lookupTerm(name, "long", genderf) + if term then -- direct match on 0-9 + return term + end + return self:_lookupTerm("ordinal", "long", genderf) + end + -- Case 10-99 + if number < 100 then + local name = ("ordinal-%02d"):format(number) + local term = self:_lookupTerm(name, "long", genderf) + if term then + return term + end + -- No direct match, try to match the last digit + local lastDigit = number % 10 + local nameLastDigit = ("ordinal-%02d"):format(lastDigit) + local termLastDigit = self:_lookupTerm(nameLastDigit, "long", genderf) + if termLastDigit and termLastDigit.match ~= "whole-number" then + return termLastDigit + end + return self:_lookupTerm("ordinal", "long", genderf) + end + -- TODO FIXME: CSL specs do define rules for larger numbers, but is this really useful? + -- Not bothering for now! + SU.error("CSL ordinal beyond currently supported range") +end + +-- PUBLIC METHODS + +--- Lookup a date format in the locale. +-- @tparam string form The form of the date ('numeric' or 'text') +-- @treturn table The date format as a table of cs:date-parts +function CslLocale:date (form) + local d = self.dates[form] + if not d then + SU.error("CSL locale date format not found: " .. tostring(form)) + end + return d +end + +--- Lookup a term in the locale. +-- Reserved for non-ordinal terms. +-- @tparam string name The name of the term +-- @tparam string form The form of the term (default: "long") +-- @tparam boolean plural Whether to return the plural form (default: false) +-- @treturn string,string The term (or empty string), and the gender or the term (or nil) +function CslLocale:term (name, form, plural) + local term = self:_lookupTerm(name, form) + if not term then + return nil + end + if type(term[1]) == "string" then + return self:_termvalue(term), term.options.gender + end + local sgpl = SU.ast.findInTree(term, plural and "cs:multiple" or "cs:single") + if not sgpl then + pl.pretty.dump(term) + return SU.error("CSL term error for singular/multiple: " .. name) + end + return self:_termvalue(sgpl), term.options.gender +end + +--- Lookup an ordinal term in the locale. +-- Reserved for ordinal terms. +-- @tparam number number The numeric value to be formatted +-- @tparam string name The name of the term +-- @tparam string form The form of the term (default: "short") +-- @tparam string genderf The gender-form of the term (default: "neuter") +-- @tparam boolean plural Whether to return the plural form (default: false) +function CslLocale:ordinal (number, form, genderf, plural) + if form == "long" then + -- TODO FIXME: Not sure this is widely used, not bothering for now + SU.warn("CSL long-ordinal term not implemented, fallback to short ordinals") + end + local term = self:_lookupShortOrdinal(number, genderf) + if not term then + SU.error("CSL ordinal term not found for ordinal: " .. tostring(number)) + end + if type(term[1]) == "string" then + return number .. self:_termvalue(term) + end + local sgpl = SU.ast.findInTree(term, plural and "cs:plural" or "cs:single") + if not sgpl then + SU.error("CSL ordinal term not found for ordinal: " .. tostring(number)) + end + return number .. self:_termvalue(sgpl) +end + +--- Apply a text case transformation. +-- @tparam string text Text to transform +-- @tparam string textCase CSL case transformation +-- @treturn string The transformed text +function CslLocale:case (text, textCase) + local lang = self.lang + if not casing[textCase] then + SU.warn("CSL locale case not found: " .. textCase) + return text + end + return casing[textCase](text, lang) +end + +--- Parse a CSL locale file (static method). +-- @tparam string doc The CSL locale file content +-- @treturn CslLocale The locale object (or nil, error message on failure) +function CslLocale.parse (doc) + local loc, err = parse(doc, rules) + if not loc then + return nil, err + end + return CslLocale(loc) +end + +--- Read a CSL locale file (static method). +-- @tparam string filename The resolved filename of the locale file +-- @treturn CslLocale The locale object (or nil, error message on failure) +function CslLocale.read (filename) + local file, err = io.open(filename) + if not file then + return nil, err + end + local doc = file:read("*a") + file:close() + return CslLocale.parse(doc) +end + +return { + CslLocale = CslLocale, +} diff --git a/csl/core/style.lua b/csl/core/style.lua new file mode 100644 index 000000000..e6ad261df --- /dev/null +++ b/csl/core/style.lua @@ -0,0 +1,102 @@ +--- Reader for CSL 1.0.2 locale files +-- +-- @copyright License: MIT (c) 2024 Omikhleia +-- +-- Public API: +-- - (static method) CslStyle.parse(doc) -> CslStyle +-- - (static method) CslStyle.read(filename) -> CslStyle +-- + +local xmlparser = require("csl.core.utils.xmlparser") +local parse = xmlparser.parse +local rules = { + prefix = "cs:", + skipEmptyStrings = true, + preserveEmptyStrings = {}, + stripSpaces = true, + preserveSpaces = { text = true, title = true, id = true, term = true }, +} + +local CslStyle = pl.class() + +function CslStyle:_init (csl) + self.csl = csl + self.macros = {} + self.locales = {} + self.bibliography = nil + self.citation = nil + self.globalOptions = {} + self:_preprocess() + self.csl = nil -- We don't need the AST anymore +end + +-- Store items in more convenient structures and maps +function CslStyle:_preprocess () + -- Global options and inheritable name options + self.globalOptions = self.csl.options + + -- Extract macros, locale overrides, citation and bibliography + for _, content in ipairs(self.csl) do + if content.command == "cs:macro" then + local name = content.options and content.options.name + if not name then + SU.error("CSL macro without name") + end + if self.macros[name] then + SU.warn("CSL macro " .. name .. " has multiple definitions, using the last one") + end + self.macros[name] = SU.ast.subContent(content) + elseif content.command == "cs:locale" then + local lang = content.options and content.options["xml:lang"] + if not lang then + SU.error("CSL locale without xml:lang") + end + if self.locales[lang] then + SU.warn("CSL locale " .. lang .. " has multiple definitions, using the last one") + end + -- Don't subcontent, so we have full locales here (overrides) + self.locales[lang] = content + elseif content.command == "cs:citation" then + if self.citation then + SU.warn("CSL has multiple citation definitions, using the last one") + end + -- Don't subContent, we want to keep the whole citation options (attributes) + self.citation = content + elseif content.command == "cs:bibliography" then + if self.bibliography then + SU.warn("CSL has multiple bibliography definitions, using the last one") + end + -- Don't subContent, we want to keep the whole bibliography options (attributes) + self.bibliography = content + end + -- We can ignore cs:info and don't expect other top-level elements + end +end + +--- Parse a CSL style document (static method). +-- @tparam string doc The CSL style document +-- @treturn Csl The parsed CSL style object (or nil, error message on failure) +function CslStyle.parse (doc) + local csl, err = parse(doc, rules) + if not csl then + return nil, err + end + return CslStyle(csl) +end + +--- Read a CSL style file (static method). +-- @tparam string filename The resolved filename of the CSL style file +-- @treturn Csl The parsed CSL style object (or nil, error message on failure) +function CslStyle.read (filename) + local file, err = io.open(filename) + if not file then + return nil, err + end + local doc = file:read("*a") + file:close() + return CslStyle.parse(doc) +end + +return { + CslStyle = CslStyle, +} diff --git a/csl/core/utils/casing.lua b/csl/core/utils/casing.lua new file mode 100644 index 000000000..501b80e93 --- /dev/null +++ b/csl/core/utils/casing.lua @@ -0,0 +1,45 @@ +--- Casing functions for CSL locales. +-- +-- @copyright License: MIT (c) 2024 Omikhleia +-- +-- Objectives: provide functions to handle text casing in CSL locales. +-- + +local icu = require("justenoughicu") +-- N.B. We don't use the textcase package here: +-- The language is a BCP47 identifier from the CSL locale. + +local capitalizeFirst = function (text, lang) + local first = luautf8.sub(text, 1, 1) + local rest = luautf8.sub(text, 2) + return icu.case(first, lang, "upper") .. rest +end + +--- Text casing methods for CSL. +-- @table casing methods for lower, upper, capitalize-first, capitalize-all, title, sentence +local casing = { + -- Straightforward + ["lowercase"] = function (text, lang) + return icu.case(text, lang, "lower") + end, + ["uppercase"] = function (text, lang) + return icu.case(text, lang, "upper") + end, + ["capitalize-first"] = capitalizeFirst, + + -- Opinionated: even ICU does not really handle this well. + -- It does not have good support for exceptions (small words, prepositions, + -- articles), etc. in most languages + -- So fallback to capitalize-first. + ["capitalize-all"] = capitalizeFirst, + ["title"] = capitalizeFirst, + + -- Deprecated. + -- Let's not bother with it. + ["sentence"] = function (text, _) + SU.warn("Sentence case is deprecated in CSL 1.0.x (ignored)") + return text + end, +} + +return casing diff --git a/csl/core/utils/superfolding.lua b/csl/core/utils/superfolding.lua new file mode 100644 index 000000000..6a33062f4 --- /dev/null +++ b/csl/core/utils/superfolding.lua @@ -0,0 +1,147 @@ +--- Superscript folding for CSL locales. +-- +-- @copyright License: MIT (c) 2024 Omikhleia +-- +-- Objectives: replace Unicode superscripted characters with their normal +-- counterparts. +-- +-- Based on Datafile for Unicode Techical Report #30 +-- http://unicode.org/reports/tr30/datafiles/SuperscriptFolding.txt +-- Copyright (c) 1991-2004 Unicode, Inc. +-- For terms of use, and documentation see http://www.unicode.org/reports/tr30/ +-- +-- Note that TR30 is not normative (and is currently suspended) +-- Maybe we should use other sources, see: +-- https://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts +-- + +local supersyms = { + -- "characters with compatibility decomposition in UnicodeData.txt" + ["ª"] = "a", + ["²"] = "2", + ["³"] = "3", + ["¹"] = "1", + ["º"] = "o", + ["ʰ"] = "h", + ["ʱ"] = "ɦ", + ["ʲ"] = "j", + ["ʳ"] = "r", + ["ʴ"] = "ɹ", + ["ʵ"] = "ɻ", + ["ʶ"] = "ʁ", + ["ʷ"] = "w", + ["ʸ"] = "y", + ["ˠ"] = "ɣ", + ["ˡ"] = "l", + ["ˢ"] = "s", + ["ˣ"] = "x", + ["ˤ"] = "ʕ", + ["ᴬ"] = "A", + ["ᴭ"] = "Æ", + ["ᴮ"] = "B", + ["ᴰ"] = "D", + ["ᴱ"] = "E", + ["ᴲ"] = "Ǝ", + ["ᴳ"] = "G", + ["ᴴ"] = "H", + ["ᴵ"] = "I", + ["ᴶ"] = "J", + ["ᴷ"] = "K", + ["ᴸ"] = "L", + ["ᴹ"] = "M", + ["ᴺ"] = "N", + ["ᴼ"] = "O", + ["ᴽ"] = "Ȣ", + ["ᴾ"] = "P", + ["ᴿ"] = "R", + ["ᵀ"] = "T", + ["ᵁ"] = "U", + ["ᵂ"] = "W", + ["ᵃ"] = "a", + ["ᵄ"] = "ɐ", + ["ᵅ"] = "ɑ", + ["ᵆ"] = "ᴂ", + ["ᵇ"] = "b", + ["ᵈ"] = "d", + ["ᵉ"] = "e", + ["ᵊ"] = "ə", + ["ᵋ"] = "ɛ", + ["ᵌ"] = "ɜ", + ["ᵍ"] = "g", + ["ᵏ"] = "k", + ["ᵐ"] = "m", + ["ᵑ"] = "ŋ", + ["ᵒ"] = "o", + ["ᵓ"] = "ɔ", + ["ᵔ"] = "ᴖ", + ["ᵕ"] = "ᴗ", + ["ᵖ"] = "p", + ["ᵗ"] = "t", + ["ᵘ"] = "u", + ["ᵙ"] = "ᴝ", + ["ᵚ"] = "ɯ", + ["ᵛ"] = "v", + ["ᵜ"] = "ᴥ", + ["ᵝ"] = "β", + ["ᵞ"] = "γ", + ["ᵟ"] = "δ", + ["ᵠ"] = "φ", + ["ᵡ"] = "χ", + ["⁰"] = "0", + ["ⁱ"] = "i", + ["⁴"] = "4", + ["⁵"] = "5", + ["⁶"] = "6", + ["⁷"] = "7", + ["⁸"] = "8", + ["⁹"] = "9", + ["⁺"] = "+", + ["⁻"] = "−", + ["⁼"] = "=", + ["⁽"] = "(", + ["⁾"] = ")", + ["ⁿ"] = "n", + -- ['℠'] = 'SM', -- Keep symbol + -- ['™'] = 'TM', -- Keep symbol + -- ['㆒'] = '一', -- Keep ideographic characters (?) + -- ['㆓'] = '二', + -- ['㆔'] = '三', + -- ['㆕'] = '四', + -- ['㆖'] = '上', + -- ['㆗'] = '中', + -- ['㆘'] = '下', + -- ['㆙'] = '甲', + -- ['㆚'] = '乙', + -- ['㆛'] = '丙', + -- ['㆜'] = '丁', + -- ['㆝'] = '天', + -- ['㆞'] = '地', + -- ['㆟'] = '人', + + -- "other characters that are superscripted forms" + ["ˀ"] = "ʔ", + ["ˁ"] = "ʕ", + -- ['ۥ'] = 'و', -- Keep Arabic characters (combining?) + -- ['ۦ'] = 'ي', +} + +-- pattern for groups of superscripted characters +local vals = {} +for k in pairs(supersyms) do + table.insert(vals, k) +end +local pat = "[" .. table.concat(vals) .. "]+" + +--- Replace Unicode superscripted characters with their normal counterparts. +-- @tparam string str The string to process. +-- @treturn string The string with superscripted characters replaced. +local function superfolding (str) + return luautf8.gsub(str, pat, function (group) + local replaced = luautf8.gsub(group, ".", function (char) + return supersyms[char] + end) + return "" .. replaced .. "" + end) +end + +return superfolding diff --git a/csl/core/utils/xmlparser.lua b/csl/core/utils/xmlparser.lua new file mode 100644 index 000000000..51b7ab734 --- /dev/null +++ b/csl/core/utils/xmlparser.lua @@ -0,0 +1,139 @@ +--- Modified XML parser +-- +-- MOSTLY ADAPTED FROM SILE's XML INPUTTER +-- BUT WITH EXTRA FEATURES FOR NAMESPACING AND SPACES CLEANING. +-- +-- It simplifies the processing a lot later... +-- TODO FIXME: This could raise an interesting discussion about the supposedly +-- generic XML support in SILE... + +local lxp = require("lxp") + +local defaultRules = { + -- NAMESPACING: + -- If defined, prefix is prepended to the tag name to create the SILE + -- command name. + -- This is a way to avoid conflicts between different XML formats and + -- SILE commands. + prefix = nil, + -- SPACES CLEANING: + -- Depending on the XML schema, some spaces may be irrelevant. + -- Some XML nodes are containers for other nodes. They may have spaces + -- in their content, due to the XML formatting and indentation. + -- Some XML nodes contain text that should be stripped of trailing and + -- leading spaces. + -- It is cumbersome to have to strip spaces in the SILE content later, + -- so we can define here the nodes for which we want to strip spaces. + -- skipEmptyStrings is eitheir a boolean or a table with tags to skip + -- text strings composed only of spaces in elements. + -- When set to true, all elements are considered by default. In that + -- case, preserveEmptyStrings is used to keep empty strings in some + -- elements. + -- stripSpaces is either a boolean or a table with tags to strip the + -- leading and trailing spaces in text elements. + -- When set to true, all elements are considered by default. In that + -- case, preserveSpaces is used to keep spaces in some tags. + stripSpaces = false, + preserveSpaces = {}, + skipEmptyStrings = false, + preserveEmptyStrings = {}, +} + +local function isStripSpaces (tag, rules) + if type(rules.stripSpaces) == "table" then + return rules.stripSpaces[tag] and not rules.preserveSpaces[tag] + end + return rules.stripSpaces and not rules.preserveSpaces[tag] +end + +local function isSkipEmptyStrings (tag, rules) + if type(rules.skipEmptyStrings) == "table" then + return rules.skipEmptyStrings[tag] and not rules.preserveEmptyStrings[tag] + end + return rules.skipEmptyStrings and not rules.preserveEmptyStrings[tag] +end + +local function startcommand (parser, command, options) + local callback = parser:getcallbacks() + local stack = callback.stack + local lno, col, pos = parser:pos() + local position = { lno = lno, col = col, pos = pos } + -- create an empty command which content will be filled on closing tag + local element = SU.ast.createCommand(command, options, nil, position) + table.insert(stack, element) +end + +local function endcommand (parser, command) + local callback = parser:getcallbacks() + local stack, rules = callback.stack, callback.rules + local element = table.remove(stack) + assert(element.command == command) + element.command = rules.prefix and (rules.prefix .. command) or command + + local level = #stack + table.insert(stack[level], element) +end + +local function text (parser, msg) + local callback = parser:getcallbacks() + local stack, rules = callback.stack, callback.rules + local element = stack[#stack] + + local stripSpaces = isStripSpaces(element.command, rules) + local skipEmptyStrings = isSkipEmptyStrings(element.command, rules) + + local txt = (stripSpaces or skipEmptyStrings) and msg:gsub("^%s+", ""):gsub("%s+$", "") or msg + if skipEmptyStrings and txt == "" then + return + end + msg = stripSpaces and txt or msg + + local n = #element + if type(element[n]) == "string" then + element[n] = element[n] .. msg + else + table.insert(element, msg) + end +end + +local function parse (doc, rules) + local content = { + StartElement = startcommand, + EndElement = endcommand, + CharacterData = text, + _nonstrict = true, + stack = { {} }, + rules = rules or defaultRules, + } + local parser = lxp.new(content) + local status, err + if type(doc) == "string" then + status, err = parser:parse(doc) + if not status then + return nil, err + end + else + -- FIXME DUBIOUS CODE SMELL + -- SILE's XML parser hsome code here, which seems wrong: + -- keys on pair()? + -- EDIT: Known issue: + -- https://github.com/sile-typesetter/sile/issues/980 + SU.error("XML parser: only string input should be supported") + -- for element in pairs(doc) do + -- status, err = parser:parse(element) + -- if not status then + -- return nil, err + -- end + -- end + end + status, err = parser:parse() + if not status then + return nil, err + end + parser:close() + return content.stack[1][1] +end + +return { + parse = parse, +} From 6b1236530e4d4bc8540775c6f9818fc986652bda Mon Sep 17 00:00:00 2001 From: Omikhleia Date: Fri, 28 Jun 2024 21:19:41 +0200 Subject: [PATCH 02/16] chore(packgase): Add a few CSL locales and styles for testing Note that these files are licensed under CC-BY-SA 3.0 and are only included as a default minimal set for testing. --- csl/locales/README.md | 7 + csl/locales/locales-en-US.xml | 774 ++++++++++++++++++++++++++ csl/locales/locales-fr-FR.xml | 701 +++++++++++++++++++++++ csl/styles/README.md | 7 + csl/styles/chicago-author-date-fr.csl | 766 +++++++++++++++++++++++++ csl/styles/chicago-author-date.csl | 704 +++++++++++++++++++++++ 6 files changed, 2959 insertions(+) create mode 100644 csl/locales/README.md create mode 100644 csl/locales/locales-en-US.xml create mode 100644 csl/locales/locales-fr-FR.xml create mode 100644 csl/styles/README.md create mode 100644 csl/styles/chicago-author-date-fr.csl create mode 100644 csl/styles/chicago-author-date.csl diff --git a/csl/locales/README.md b/csl/locales/README.md new file mode 100644 index 000000000..0de8a5735 --- /dev/null +++ b/csl/locales/README.md @@ -0,0 +1,7 @@ +The files in this directory are the locale files for CSL styles, from the Citation Style Language project (https://github.com/citation-style-language/locales) + +They are distributed under the Creative Commons Attribution-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-sa/3.0/). + +We are providing these files here for convenience, so that SILE has a default set of locales for testing its implementation of CSL. + +Please note that the CSL project may have newer versions of these files. diff --git a/csl/locales/locales-en-US.xml b/csl/locales/locales-en-US.xml new file mode 100644 index 000000000..3f9ebadd0 --- /dev/null +++ b/csl/locales/locales-en-US.xml @@ -0,0 +1,774 @@ + + + + + Andrew Dunning + + + Sebastian Karcher + + + Rintze M. Zelle + + + Denis Meier + + + Brenton M. Wiernik + + This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License + 2024-03-12T13:41:31+00:00 + + + + + + + + + + + + + + + accessed + advance online publication + album + and + and others + anonymous + anon. + at + audio recording + available at + by + circa + c. + cited + et al. + film + forthcoming + from + henceforth + ibid. + in + in press + internet + letter + loc. cit. + no date + n.d. + no place + n.p. + no publisher + n.p. + on + online + op. cit. + original work published + personal communication + podcast + podcast episode + preprint + presented at the + radio broadcast + radio series + radio series episode + + reference + references + + + ref. + refs. + + retrieved + review of + rev. of + scale + special issue + special section + television broadcast + television series + television series episode + video + working paper + + + preprint + journal article + magazine article + newspaper article + bill + + broadcast + + classic + collection + dataset + document + entry + dictionary entry + encyclopedia entry + event + + graphic + hearing + interview + legal case + legislation + manuscript + map + video recording + musical score + pamphlet + conference paper + patent + performance + periodical + personal communication + post + blog post + regulation + report + review + book review + software + audio recording + presentation + standard + thesis + treaty + webpage + + + journal art. + mag. art. + newspaper art. + + + doc. + + graph. + interv. + MS + video rec. + rep. + rev. + bk. rev. + audio rec. + + + + testimony of + review of + review of the book + + + AD + BC + BCE + CE + + + + + + + + : + , + ; + + + th + st + nd + rd + th + th + th + + + first + second + third + fourth + fifth + sixth + seventh + eighth + ninth + tenth + + + + act + acts + + + appendix + appendices + + + article + articles + + + book + books + + + canon + canons + + + chapter + chapters + + + column + columns + + + location + locations + + + equation + equations + + + figure + figures + + + folio + folios + + + number + numbers + + + line + lines + + + note + notes + + + opus + opera + + + page + pages + + + paragraph + paragraphs + + + part + parts + + + rule + rules + + + scene + scenes + + + section + sections + + + sub verbo + sub verbis + + + supplement + supplements + + + table + tables + + + + + + + title + titles + + + verse + verses + + + version + versions + + + volume + volumes + + + + + + app. + apps. + + + art. + arts. + + + bk. + bks. + + + c. + cc. + + + chap. + chaps. + + + col. + cols. + + + loc. + locs. + + + eq. + eqs. + + + fig. + figs. + + + fol. + fols. + + + no. + nos. + + + l. + ll. + + + n. + nn. + + + op. + opp. + + + p. + pp. + + + para. + paras. + + + pt. + pts. + + + r. + rr. + + + sc. + scs. + + + sec. + secs. + + + s.v. + s.vv. + + + supp. + supps. + + + tbl. + tbls. + + + tit. + tits. + + + v. + vv. + + + v. + v. + + + vol. + vols. + + + + + + ¶¶ + + + § + §§ + + + + + chapter + chapters + + + citation + citations + + + number + numbers + + + edition + editions + + + reference + references + + + number + numbers + + + page + pages + + + volume + volumes + + + page + pages + + + printing + printings + + + + + chap. + chaps. + + + cit. + cits. + + + no. + nos. + + + ed. + eds. + + + ref. + refs. + + + no. + nos. + + + p. + pp. + + + vol. + vols. + + + p. + pp. + + + print. + prints. + + + + + + + chair + chairs + + + editor + editors + + + compiler + compilers + + + contributor + contributors + + + curator + curators + + + director + directors + + + editor + editors + + + editor & translator + editors & translators + + + editor + editors + + + executive producer + executive producers + + + guest + guests + + + host + hosts + + + illustrator + illustrators + + + narrator + narrators + + + organizer + organizers + + + performer + performers + + + producer + producers + + + writer + writers + + + series creator + series creators + + + translator + translators + + + + + + ed. + eds. + + + comp. + comps. + + + contrib. + contribs. + + + cur. + curs. + + + dir. + dirs. + + + ed. + eds. + + + ed. & tran. + eds. & trans. + + + ed. + eds. + + + exec. prod. + exec. prods. + + + ill. + ills. + + + narr. + narrs. + + + org. + orgs. + + + perf. + perfs. + + + prod. + prods. + + + writ. + writs. + + + cre. + cres. + + + tran. + trans. + + + + by + chaired by + edited by + compiled by + composed by + by + with + curated by + directed by + edited by + edited & translated by + edited by + executive produced by + with guest + hosted by + illustrated by + interview by + narrated by + organized by + by + performed by + produced by + to + by + written by + created by + translated by + + + + ed. by + comp. by + comp. by + w. + cur. by + dir. by + ed. by + ed. & trans. by + ed. by + exec. prod. by + w. guest + illus. by + narr. by + org. by + perf. by + prod. by + writ. by + cre. by + trans. by + + + January + February + March + April + May + June + July + August + September + October + November + December + + + Jan. + Feb. + Mar. + Apr. + May + Jun. + Jul. + Aug. + Sep. + Oct. + Nov. + Dec. + + + Spring + Summer + Autumn + Winter + + diff --git a/csl/locales/locales-fr-FR.xml b/csl/locales/locales-fr-FR.xml new file mode 100644 index 000000000..23b6d80fa --- /dev/null +++ b/csl/locales/locales-fr-FR.xml @@ -0,0 +1,701 @@ + + + + + Grégoire Colly + + + Collectif Zotero francophone + + This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License + 2012-07-04T23:31:02+00:00 + + + + + + + + + + + + + + publication en ligne anticipée + album + enregistrement audio + film + désormais + loc. cit. + sans lieu + s. l. + sans nom + s. n. + sur + op. cit. + édition originale + communication personnelle + podcast + épisode de podcast + prépublication + émission de radio + série radiophonique + épisode de série radiophonique + numéro spécial + section spéciale + émission de télévision + série télévisée + épisode de série télévisée + vidéo + document de travail + consulté le + et + et autres + anonyme + anon. + sur + disponible sur + par + vers + v. + cité + + reference + references + + + number + numbers + + + édition + éditions + + + ref. + refs. + + + no. + nos. + + éd. + et al. + à paraître + à l'adresse + ibid. + in + sous presse + Internet + lettre + sans date + s. d. + en ligne + présenté à + + référence + références + + + réf. + réf. + + review of + rev. of + consulté + échelle + version + + + article + article de revue + article de magazine + article de presse + projet de loi + + émission + + classique + collection + jeu de données + document + entrée + entrée de dictionnaire + entrée d'encyclopédie + événement + + image + audience + entretien + affaire + acte juridique + manuscrit + carte + enregistrement vidéo + partition + pamphlet + article de colloque + brevet + interprétation + périodique + communication personnelle + billet + billet de blog + règlement + rapport + recension + recension de livre + logiciel + chanson + présentation + norme + thèse + traité + page web + + + art. de revue + art. de mag. + art. de presse + + + doc. + + graph. + interv. + ms + enr. vidéo + rap. + recens. + recens. de liv. + enr. audio + + + + testimony of + recension de + recension du livre + + + apr. J.-C. + av. J.-C. + av. n. è. + n. è. + + + «  +  » + + + +  : + , +  ; + + + + ʳᵉ + ᵉʳ + + + premier + deuxième + troisième + quatrième + cinquième + sixième + septième + huitième + neuvième + dixième + + + + acte + actes + + + appendice + appendices + + + article + articles + + + canon + canons + + + emplacement + emplacements + + + équation + équations + + + règle + règles + + + scène + scènes + + + tableau + tableaux + + + + + + + titre + titres + + + livre + livres + + + chapitre + chapitres + + + colonne + colonnes + + + figure + figures + + + folio + folios + + + numéro + numéros + + + ligne + lignes + + + note + notes + + + opus + opus + + + page + pages + + + volume + volumes + + + page + pages + + + printing + printings + + + + chap. + chaps. + + + cit. + cits. + + + nᵒ + nᵒˢ + + + page + pages + + + paragraphe + paragraphes + + + partie + parties + + + section + sections + + + supplement + supplements + + + sub verbo + sub verbis + + + verset + versets + + + volume + volumes + + + + + append. + append. + + + art. + art. + + + emplact + emplact + + + eq. + eq. + + + règle + règles + + + sc. + sc. + + + tab. + tab. + + + + + + + tit. + tit. + + liv. + chap. + col. + fig. + + fᵒ + fᵒˢ + + + nᵒ + nᵒˢ + + l. + n. + op. + + p. + p. + + + vol. + vols. + + + p. + pp. + + + print. + prints. + + + + + p. + p. + + paragr. + part. + sect. + + supp. + supps. + + + s. v. + s. vv. + + + v. + v. + + + vol. + vol. + + + + + § + § + + + chapter + chapters + + + citation + citations + + + numéro + numéros + + + § + § + + + + + ed. + eds. + + + président + présidents + + + compilateur + compilateurs + + + contributeur + contributeurs + + + commissaire + commissaires + + + producteur exécutif + producteurs exécutifs + + + invité + invités + + + hôte + hôtes + + + narrateur + narrateurs + + + organisateur + organisateurs + + + interprète + interprètes + + + producteur + producteurs + + + scénariste + scénaristes + + + créateur de série + créateurs de série + + + réalisateur + réalisateurs + + + éditeur + éditeurs + + + directeur + directeurs + + + illustrateur + illustrateurs + + + traducteur + traducteurs + + + éditeur et traducteur + éditeurs et traducteurs + + + + + compil. + compil. + + + contrib. + contrib. + + + commiss. + commiss. + + + prod. exé. + prod. exé. + + + narr. + narr. + + + org. + org. + + + interpr. + interpr. + + + prod. + prod. + + + scénar. + scénar. + + + créat. + créat. + + + réal. + réal. + + + éd. + éd. + + + dir. + dir. + + + ill. + ill. + + + trad. + trad. + + + éd. et trad. + éd. et trad. + + + + edited by + présidé par + compilé par + avec + organisé par + production exécutive par + avec pour invité + animé par + lu par + organisé par + interprété par + produit par + scénario de + créé par + par + réalisé par + édité par + sous la direction de + illustré par + entretien réalisé par + à + par + ed. by + traduit par + édité et traduit par + + + compil. par + ac + org. par + prod. exé. par + ac pr inv. + anim. par + lu par + org. par + interpr. par + prod. par + scénar. de + créé par + réal. par + éd. par + ss la dir. de + ill. par + trad. par + éd. et trad. par + + + janvier + février + mars + avril + mai + juin + juillet + août + septembre + octobre + novembre + décembre + + + janv. + févr. + mars + avr. + mai + juin + juill. + août + sept. + oct. + nov. + déc. + + + printemps + été + automne + hiver + + diff --git a/csl/styles/README.md b/csl/styles/README.md new file mode 100644 index 000000000..ef701c6a9 --- /dev/null +++ b/csl/styles/README.md @@ -0,0 +1,7 @@ +The files in this directory are the style files for CSL, from the Citation Style Language project (https://github.com/citation-style-language/styles) + +They are distributed under the Creative Commons Attribution-ShareAlike 3.0 Unported License (http://creativecommons.org/licenses/by-sa/3.0/). + +We are providing these files here for convenience, so that SILE has a default set of styles for testing its implementation of CSL. + +Please note that the CSL project may have newer versions of these files. diff --git a/csl/styles/chicago-author-date-fr.csl b/csl/styles/chicago-author-date-fr.csl new file mode 100644 index 000000000..3ec4b2be4 --- /dev/null +++ b/csl/styles/chicago-author-date-fr.csl @@ -0,0 +1,766 @@ + + diff --git a/csl/styles/chicago-author-date.csl b/csl/styles/chicago-author-date.csl new file mode 100644 index 000000000..cb34afd4f --- /dev/null +++ b/csl/styles/chicago-author-date.csl @@ -0,0 +1,704 @@ + + From 808c6bbd8165b8b5d4fff11489fb2a5341d6b3dd Mon Sep 17 00:00:00 2001 From: Omikhleia Date: Sat, 29 Jun 2024 08:06:20 +0200 Subject: [PATCH 03/16] feat(packages): Use experimental CSL renderer for BibTeX --- packages/bibtex/init.lua | 261 +++++++++++++++++++++++++++- packages/bibtex/support/bib2csl.lua | 199 +++++++++++++++++++++ packages/bibtex/support/nbibtex.lua | 2 + 3 files changed, 459 insertions(+), 3 deletions(-) create mode 100644 packages/bibtex/support/bib2csl.lua diff --git a/packages/bibtex/init.lua b/packages/bibtex/init.lua index c27c79a8f..1f8999724 100644 --- a/packages/bibtex/init.lua +++ b/packages/bibtex/init.lua @@ -1,5 +1,39 @@ local base = require("packages.base") +local loadkit = require("loadkit") +local cslStyleLoader = loadkit.make_loader("csl") +local cslLocaleLoader = loadkit.make_loader("xml") + +local CslLocale = require("csl.core.locale").CslLocale +local CslStyle = require("csl.core.style").CslStyle +local CslEngine = require("csl.core.engine").CslEngine + +local function loadCslLocale (name) + local filename = SILE.resolveFile("csl/locales/locales-" .. name .. ".xml") + or cslLocaleLoader("csl.locales.locales-" .. name) + if not filename then + SU.error("Could not find CSL locale '" .. name .. "'") + end + local locale, err = CslLocale.read(filename) + if not locale then + SU.error("Could not open CSL locale '" .. name .. "'': " .. err) + return + end + return locale +end +local function loadCslStyle (name) + local filename = SILE.resolveFile("csl/styles/" .. name .. ".csl") or cslStyleLoader("csl.styles." .. name) + if not filename then + SU.error("Could not find CSL style '" .. name .. "'") + end + local style, err = CslStyle.read(filename) + if not style then + SU.error("Could not open CSL style '" .. name .. "'': " .. err) + return + end + return style +end + local package = pl.class(base) package._name = "bibtex" @@ -7,6 +41,7 @@ local epnf = require("epnf") local nbibtex = require("packages.bibtex.support.nbibtex") local namesplit, parse_name = nbibtex.namesplit, nbibtex.parse_name local isodatetime = require("packages.bibtex.support.isodatetime") +local bib2csl = require("packages.bibtex.support.bib2csl") local Bibliography @@ -241,10 +276,36 @@ local function crossrefAndXDataResolve (bib, entry) end end +function package:loadOptPackage (pack) + local ok, _ = pcall(function () + self:loadPackage(pack) + return true + end) + SU.debug("bibtex", "Optional package " .. pack .. (ok and " loaded" or " not loaded")) + return ok +end + function package:_init () base._init(self) SILE.scratch.bibtex = { bib = {} } Bibliography = require("packages.bibtex.bibliography") + -- For DOI, PMID, PMCID and URL support. + self:loadPackage("url") + -- For underline styling support + self:loadPackage("rules") + -- For TeX-like math support (extension) + self:loadPackage("math") + -- For superscripting support in number formatting + -- Play fair: try to load 3rd-party optional textsubsuper package. + -- If not available, fallback to raiselower to implement textsuperscript + if not self:loadOptPackage("textsubsuper") then + self:loadPackage("raiselower") + self:registerCommand("textsuperscript", function (_, content) + SILE.call("raise", { height = "0.7ex" }, function () + SILE.call("font", { size = "1.5ex" }, content) + end) + end) + end end function package.declareSettings (_) @@ -262,6 +323,8 @@ function package:registerCommands () parseBibtex(file, SILE.scratch.bibtex.bib) end) + -- LEGACY COMMANDS + self:registerCommand("bibstyle", function (_, _) SU.deprecated("\\bibstyle", "\\set[parameter=bibtex.style]", "0.13.2", "0.14.0") end) @@ -309,6 +372,161 @@ function package:registerCommands () end SILE.processString(("%s"):format(cite), "xml") end) + + -- NEW CSL IMPLEMENTATION + + -- Internal commands for CSL processing + + self:registerCommand("bibSmallCaps", function (_, content) + -- To avoid attributes in the CSL-processed content + SILE.call("font", { features = "+smcp" }, content) + end) + + -- CSL 1.0.2 appendix VI + -- "If the bibliography entry for an item renders any of the following + -- identifiers, the identifier should be anchored as a link, with the + -- target of the link as follows: + -- url: output as is + -- doi: prepend with “https://doi.org/” + -- pmid: prepend with “https://www.ncbi.nlm.nih.gov/pubmed/” + -- pmcid: prepend with “https://www.ncbi.nlm.nih.gov/pmc/articles/” + -- NOT IMPLEMENTED: + -- "Citation processors should include an option flag for calling + -- applications to disable bibliography linking behavior." + -- (But users can redefine these commands to their liking...) + self:registerCommand("bibLink", function (options, content) + SILE.call("href", { src = options.src }, { + SU.ast.createCommand("url", {}, { content[1] }), + }) + end) + self:registerCommand("bibURL", function (_, content) + local link = content[1] + if not link:match("^https?://") then + -- Play safe + link = "https://" .. link + end + SILE.call("bibLink", { src = link }, content) + end) + self:registerCommand("bibDOI", function (_, content) + local link = content[1] + if not link:match("^https?://") then + link = "https://doi.org/" .. link + end + SILE.call("bibLink", { src = link }, content) + end) + self:registerCommand("bibPMID", function (_, content) + local link = content[1] + if not link:match("^https?://") then + link = "https://www.ncbi.nlm.nih.gov/pubmed/" .. link + end + SILE.call("bibLink", { src = link }, content) + end) + self:registerCommand("bibPMCID", function (_, content) + local link = content[1] + if not link:match("^https?://") then + link = "https://www.ncbi.nlm.nih.gov/pmc/articles/" .. link + end + SILE.call("bibLink", { src = link }, content) + end) + + -- Style and locale loading + + self:registerCommand("bibliographystyle", function (options, _) + local sty = SU.required(options, "style", "bibliographystyle") + local style = loadCslStyle(sty) + -- FIXME: lang is mandatory until we can map document.lang to a resolved + -- BCP47 with region always present, as this is what CSL locales require. + if not options.lang then + -- Pick the default locale from the style, if any + options.lang = style.globalOptions["default-locale"] + end + local lang = SU.required(options, "lang", "bibliographystyle") + local locale = loadCslLocale(lang) + SILE.scratch.bibtex.engine = CslEngine(style, locale, { + localizedPunctuation = SU.boolean(options.localizedPunctuation, false), + italicExtension = SU.boolean(options.italicExtension, true), + mathExtension = SU.boolean(options.mathExtension, true), + }) + end) + + self:registerCommand("csl:cite", function (options, content) + -- TODO: + -- - locator support + -- - multiple citation keys + if not SILE.scratch.bibtex.engine then + SILE.call("bibliographystyle", { lang = "en-US", style = "chicago-author-date" }) + -- SILE.call("bibliographystyle", { lang = "en-US", style = "chicago-fullnote-bibliography" }) + -- SILE.call("bibliographystyle", { lang = "en-US", style = "apa" }) + end + local engine = SILE.scratch.bibtex.engine + if not options.key then + options.key = SU.ast.contentToString(content) + end + local entry = SILE.scratch.bibtex.bib[options.key] + if not entry then + SU.warn("Unknown reference in citation " .. options.key) + return + end + if entry.type == "xdata" then + SU.warn("Skipped citation of @xdata entry " .. options.key) + return + end + crossrefAndXDataResolve(SILE.scratch.bibtex.bib, entry) + + local csljson = bib2csl(entry) + -- csljson.locator = { -- EXPERIMENTAL + -- label = "page", + -- value = "123-125" + -- } + local cite = engine:cite(csljson) + + SILE.processString(("%s"):format(cite), "xml") + end) + + self:registerCommand("csl:reference", function (options, content) + if not SILE.scratch.bibtex.engine then + SILE.call("bibliographystyle", { lang = "en-US", style = "chicago-author-date" }) + -- SILE.call("bibliographystyle", { lang = "en-US", style = "chicago-fullnote-bibliography" }) + -- SILE.call("bibliographystyle", { lang = "en-US", style = "apa" }) + end + local engine = SILE.scratch.bibtex.engine + if not options.key then + options.key = SU.ast.contentToString(content) + end + local entry = SILE.scratch.bibtex.bib[options.key] + if not entry then + SU.warn("Unknown reference in citation " .. options.key) + return + end + if entry.type == "xdata" then + SU.warn("Skipped citation of @xdata entry " .. options.key) + return + end + crossrefAndXDataResolve(SILE.scratch.bibtex.bib, entry) + + local cslentry = bib2csl(entry) + local cite = engine:reference(cslentry) + + SILE.processString(("%s"):format(cite), "xml") + end) + + self:registerCommand("printbibliography", function (_, _) + local bib = SILE.scratch.bibtex.bib + -- TEMP: until we implement proper sorting, let's sort by keys + -- for reproducibility. + local tkeys = {} + for k, _ in pairs(bib) do + table.insert(tkeys, k) + end + table.sort(tkeys) + local count = 0 + for _, k in ipairs(tkeys) do + SILE.call("csl:reference", { key = k }) + SILE.call("par") + count = count + 1 + end + SILE.typesetter:typeset("¤ " .. count .. " references") + end) end package.documentation = [[ @@ -321,13 +539,48 @@ This experimental package allows SILE to read and process BibTeX \code{.bib} fil To load a BibTeX file, issue the command \autodoc:command{\loadbibliography[file=]} +\smallskip +\noindent +\em{Producing citations and references (legacy commands)} +\novbreak + +\indent To produce an inline citation, call \autodoc:command{\cite{}}, which will typeset something like “Jones 1982”. If you want to cite a particular page number, use \autodoc:command{\cite[page=22]{}}. To produce a full reference, use \autodoc:command{\reference{}}. -Currently, the only supported bibliography style is Chicago referencing, but other styles should be easy to implement. -Adapt \code{packages/bibtex/styles/chicago.lua} as necessary. +Currently, the only supported bibliography style is Chicago referencing. + +\smallskip +\noindent +\em{Producing citations and references (CSL implementation)} +\novbreak + +\indent +While an experimental work-in-progress, the CSL (Citation Style Language) implementation is more powerful and flexible than the legacy commands. + +You must first invoke \autodoc:command{\bibliographystyle[style=