Skip to content

Commit

Permalink
feat(math): Support the MathML operator dictionary and many TeX-like …
Browse files Browse the repository at this point in the history
…aliases
  • Loading branch information
Omikhleia authored and Didier Willis committed Nov 14, 2024
1 parent 6bad0e7 commit 847e209
Show file tree
Hide file tree
Showing 7 changed files with 4,346 additions and 2,639 deletions.
38 changes: 38 additions & 0 deletions packages/math/atoms.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
local atomType = {
ordinary = 0,
bigOperator = 1,
binaryOperator = 2,
relationalOperator = 3,
openingSymbol = 4,
closeSymbol = 5,
punctuationSymbol = 6,
inner = 7,
overSymbol = 8,
underSymbol = 9,
accentSymbol = 10,
radicalSymbol = 11,
vcenter = 12,
}

-- Shorthands for atom types, used in the `atom` command option
-- and also in the unicode symbols table / operator dictionary
local atomTypeShort = {
ord = atomType.ordinary,
big = atomType.bigOperator,
bin = atomType.binaryOperator,
rel = atomType.relationalOperator,
open = atomType.openingSymbol,
close = atomType.closeSymbol,
punct = atomType.punctuationSymbol,
inner = atomType.inner,
over = atomType.overSymbol,
under = atomType.underSymbol,
accent = atomType.accentSymbol,
radical = atomType.radicalSymbol,
vcenter = atomType.vcenter,
}

return {
atomType = atomType,
atomTypeShort = atomTypeShort,
}
13 changes: 5 additions & 8 deletions packages/math/base-elements.lua
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
local nodefactory = require("types.node")
local hb = require("justenoughharfbuzz")
local ot = require("core.opentype-parser")
local syms = require("packages.math.unicode-symbols")
local atoms = require("packages.math.atoms")
local mathvariants = require("packages.math.unicode-mathvariants")
local convertMathVariantScript = mathvariants.convertMathVariantScript

local atomType = syms.atomType
local symbolDefaults = syms.symbolDefaults
local atomType = atoms.atomType

local elements = {}

Expand Down Expand Up @@ -423,7 +422,7 @@ function elements.stackbox:shape ()
end
-- Handle stretchy operators
for _, elt in ipairs(self.children) do
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
elt:_vertStretchyReshape(self.depth, self.height)
end
end
Expand Down Expand Up @@ -694,14 +693,14 @@ function elements.underOver:_stretchyReshapeToBase (part)
-- MathML3 "complex1" torture test: Maxwell's Equations (vectors in fractions)
if #part.children == 0 then
local elt = part
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
elt:_horizStretchyReshape(self.base.width)
end
elseif part:is_a(elements.underOver) then
-- Big assumption here: only considering one level of stacked under/over.
local hasStretched = false
for _, elt in ipairs(part.children) do
if elt.is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
if elt:is_a(elements.text) and elt.kind == "operator" and SU.boolean(elt.stretchy, false) then
local stretched = elt:_horizStretchyReshape(self.base.width)
if stretched then
hasStretched = true
Expand Down Expand Up @@ -1652,8 +1651,6 @@ function elements.bevelledFraction:output (x, y, line)
end

elements.mathMode = mathMode
elements.atomType = atomType
elements.symbolDefaults = symbolDefaults
elements.newSubscript = newSubscript
elements.newUnderOver = newUnderOver

Expand Down
42 changes: 30 additions & 12 deletions packages/math/texlike.lua
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
local atoms = require("packages.math.atoms")
local syms = require("packages.math.unicode-symbols")
local bits = require("core.parserbits")

local epnf = require("epnf")
local lpeg = require("lpeg")

local atomType = syms.atomType
local symbolDefaults = syms.symbolDefaults
local atomType = atoms.atomType
local atomTypeShort = atoms.atomTypeShort
local operatorDict = syms.operatorDict
local symbols = syms.symbols

-- Grammar to parse TeX-like math
Expand Down Expand Up @@ -260,7 +262,7 @@ local compileToStr = function (argEnv, mathlist)
end
end

local function isOperatorKind (tree, typeOfAtom, typeOfSymbol)
local function isOperatorKind (tree, typeOfAtom)
if not tree then
return false -- safeguard
end
Expand All @@ -274,8 +276,8 @@ local function isOperatorKind (tree, typeOfAtom, typeOfSymbol)
end
-- Case \mo{ops} where ops is registered with the resquested type
-- E.g. \mo{∑) or \sum
if tree[1] and symbolDefaults[tree[1]] and symbolDefaults[tree[1]].atom == typeOfSymbol then
return true
if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].atom then
return operatorDict[tree[1]].atom == atomTypeShort[typeOfAtom]
end
return false
end
Expand All @@ -287,20 +289,32 @@ local function isMoveableLimits (tree)
if tree.options and SU.boolean(tree.options.movablelimits, false) then
return true
end
if tree[1] and symbolDefaults[tree[1]] and SU.boolean(symbolDefaults[tree[1]].movablelimits, false) then
return true
if tree[1] and operatorDict[tree[1]] and operatorDict[tree[1]].forms then
-- Leap of faith: We have not idea yet which form the operator will take
-- in the final MathML.
-- In the MathML operator dictionary, some operators have a movablelimits
-- in some forms and not in others.
-- Ex. \Join (U+2A1D) and \bigtriangleleft (U+2A1E) have it prefix but not
-- infix, for some unspecified reason (?).
-- Assume that if at least one form has movablelimits, the operator is
-- considered to have movablelimits "in general".
for _, form in pairs(operatorDict[tree[1]].forms) do
if SU.boolean(form.movablelimits, false) then
return true
end
end
end
return false
end
local function isCloseOperator (tree)
return isOperatorKind(tree, "close", atomType.closeSymbol)
return isOperatorKind(tree, "close")
end
local function isOpeningOperator (tree)
return isOperatorKind(tree, "open", atomType.openingSymbol)
return isOperatorKind(tree, "open")
end

local function isAccentSymbol (symbol)
return symbolDefaults[symbol] and symbolDefaults[symbol].atom == atomType.accentSymbol
return operatorDict[symbol] and operatorDict[symbol].atom == atomType.accentSymbol
end

local function compileToMathML_aux (_, arg_env, tree)
Expand Down Expand Up @@ -666,8 +680,12 @@ compileToMathML(
\def{mathtt}{\mi[mathvariant=monospace]{#1}}
% Modulus operator forms
\def{bmod}{\mo{mod}}
\def{pmod}{\quad(\mo{mod} #1)}
% See Michael Downes & Barbara Beeton, "Short Math Guide for LaTeX"
% American Mathematical Society (v2.0, 2017), §7.1 p. 18
\def{bmod}{\mo[atom=bin]{mod}}
\def{pmod}{\quad(\mo[atom=ord]{mod}\>#1)}
\def{mod}{\quad \mo[atom=ord]{mod}\>#1}
\def{pod}{\quad(#1)}
% Phantom commands from TeX/LaTeX
\def{phantom}{\mphantom{#1}}
Expand Down
201 changes: 201 additions & 0 deletions packages/math/tools/unicode-xml-to-sile.xsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!--
Stylesheet to convert the unicode.xml file to a SILE Lua file:
xsltproc unicode-xml-to-sile.xsl unicode.xml > ../packages/math/unicode-symbols-generated.lua
Where unicode.xml is:
https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
-->
<xsl:output method="text" indent="no"/>

<xsl:template name="format-value">
<xsl:param name="value" />
<xsl:choose>
<!-- integer -->
<xsl:when test="floor($value) = $value"><xsl:value-of select="$value" /></xsl:when>
<!-- boolean -->
<xsl:when test="$value = 'true' or $value = 'false'"><xsl:value-of select="$value" /></xsl:when>
<!-- string -->
<xsl:otherwise>"<xsl:value-of select="$value" />"</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-codepoint">
<xsl:param name="codepoint" />
<!-- Codepoint is UXXXX, remove the U -->
<xsl:variable name="hex" select="concat('U(0x', substring($codepoint, 2), ')')" />
<xsl:choose>
<xsl:when test="contains($hex, '-')">
<!-- Special case for 2-characters operators -->
<!-- CAVEAT: We do not expect operators with more than 2 characters -->
<xsl:value-of select="substring-before($hex, '-')" />
<xsl:value-of select="concat(', 0x', substring-after($hex, '-'))" />
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$hex" />
</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template name="format-class">
<xsl:param name="class" />
<xsl:param name="combclass" />
<xsl:param name="description" />
<xsl:choose>
<xsl:when test="$class = 'N'">ord</xsl:when><!-- Normal = mathord = atomType.ordinary -->
<xsl:when test="$class = 'A'">ord</xsl:when><!-- Alphabetic = mathalpha = atomType.ordinary -->
<xsl:when test="$class = 'B'">bin</xsl:when><!-- Binary = mathbin = atomType.binaryOperator -->
<xsl:when test="$class = 'C'">close</xsl:when><!-- Closing = mathclose = atomType.closeSymbol -->
<xsl:when test="$class = 'D'"><!-- Diacritic -->
<xsl:choose>
<xsl:when test="$combclass = '220'">botaccent</xsl:when>
<xsl:when test="$combclass = '230'">accent</xsl:when>
<xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'F'">ord</xsl:when><!-- Fence = mathfence = atomType.ordinary -->
<xsl:when test="$class = 'G'">ord</xsl:when><!-- Glyph Part = assuming atomType.ordinary -->
<xsl:when test="$class = 'L'"><!-- Large -->
<xsl:choose>
<!-- SILE uses the atom for spacing currently (ignoring lspace and rspace) -->
<!-- HACK: integral signs are NOTconsidered as big for spacing purpose -->
<xsl:when test="contains($description,'INTEGRAL')">ord</xsl:when>
<xsl:otherwise>big</xsl:otherwise><!-- mathop = atomType.bigOperator -->
</xsl:choose>
</xsl:when>
<xsl:when test="$class = 'O'">open</xsl:when><!-- Opening = mathopen = atomType.openingSymbol -->
<xsl:when test="$class = 'P'">punct</xsl:when><!-- Punctuation = mathpunct = atomType.punctuationSymbol -->
<xsl:when test="$class = 'R'">rel</xsl:when><!-- Relation = mathrel = atomType.relationalOperator -->
<xsl:when test="$class = 'S'">ord</xsl:when><!-- Space = assuming atomType.ordinary -->
<xsl:when test="$class = 'U'">ord</xsl:when><!-- Unary = mathord = atomType.ordinary -->
<xsl:when test="$class = 'V'">bin</xsl:when><!-- Vary = assume mathbin = atomType.binaryOperator -->
<xsl:otherwise>ord</xsl:otherwise><!-- assuming atomType.ordinary if not specified -->
</xsl:choose>
</xsl:template>

<xsl:template name="format-mathlatex">
<xsl:param name="mathlatex" />
<xsl:choose>
<xsl:when test="$mathlatex">"<xsl:value-of select="substring($mathlatex, 2)" />"</xsl:when>
<xsl:otherwise>nil</xsl:otherwise>
</xsl:choose>
</xsl:template>

<xsl:template match="unicode">--- GENERATED FILE, DO NOT EDIT MANUALLY
--
-- Operator dictionary for unicode characters
--
-- Extracted from https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml
-- (https://github.com/w3c/xml-entities)
-- Copyright David Carlisle 1999-2024
-- Use and distribution of this code are permitted under the terms of the
-- W3C Software Notice and License.
-- http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231.html
-- This file is a collection of information about how to map Unicode entities to LaTeX,
-- and various SGML/XML entity sets (ISO and MathML/HTML).
-- A Unicode character may be mapped to several entities.
-- Originally designed by Sebastian Rahtz in conjunction with Barbara Beeton for the STIX project
--

local atoms = require("packages/math/atoms")
local atomTypeShort = atoms.atomTypeShort

--- Transform a list of codepoints into a string
local function U (...)
local t = { ... }
local str = ""
for i = 1, #t do
str = str .. luautf8.char(t[i])
end
return str
end

local symbols = {}
local operatorDict = {}

--- Register a symbol
-- @tparam string str String representation of the symbol
-- @tparam string shortatom Short atom type
-- @tparam string mathlatex TeX-like name of the symbol (from unicode-math)
-- @tparam string _ Unicode name of the symbol (informative)
-- @tparam table ops List of operator forms and their properties
local function addSymbol (str, shortatom, mathlatex, _, ops)
if mathlatex then
SU.debug("math.symbols", "Registering symbol", str, "as", mathlatex)
symbols[mathlatex] = str
end
local op = {}
op.atom = atomTypeShort[shortatom]
if ops then
op.forms = {}
for _, v in pairs(ops) do
if v.form then
v.lspace = SILE.types.length(v.lspace and ("%smu"):format(v.lspace) or "0mu")
v.rspace = SILE.types.length(v.rspace and ("%smu"):format(v.rspace) or "0mu")
op.forms[v.form] = v
else
SU.warn("No form for operator " .. str .. " (operator dictionary is probably incomplete)")
end
end
end
operatorDict[str] = op
end

<xsl:apply-templates select="charlist/character" />

return {
operatorDict = operatorDict,
symbols = symbols
}
</xsl:template>

<xsl:template match="character">
<xsl:variable name="mathclass" select="unicodedata/@mathclass" />
<xsl:variable name="mathlatex" select="mathlatex[@set='unicode-math']/text()" />
<xsl:variable name="combclass" select="unicodedata/@combclass" />
<xsl:variable name="atom">
<xsl:call-template name="format-class">
<xsl:with-param name="class" select="$mathclass" />
<xsl:with-param name="combclass" select="$combclass" />
<xsl:with-param name="description" select="description" />
</xsl:call-template>
</xsl:variable>
<xsl:if test="$atom != 'ord' or $mathlatex or operator-dictionary">
<xsl:text>
addSymbol(</xsl:text>
<!-- Codepoints -->
<xsl:call-template name="format-codepoint">
<xsl:with-param name="codepoint" select="@id" />
</xsl:call-template>
<!-- Atom type -->
<xsl:text>, "</xsl:text><xsl:value-of select="$atom" /><xsl:text>", </xsl:text>
<!-- Math latex name or nil -->
<xsl:call-template name="format-mathlatex">
<xsl:with-param name="mathlatex" select="$mathlatex" />
</xsl:call-template>
<!-- Description -->
<xsl:text>, "</xsl:text><xsl:value-of select="description" /><xsl:text>"</xsl:text>
<!-- Operator dictionary or nil -->
<xsl:choose>
<xsl:when test="operator-dictionary">
<xsl:text>, {</xsl:text>
<xsl:apply-templates select="operator-dictionary">
<xsl:sort select="@priority" data-type="number" order="descending" /><!-- sort by @priority -->
</xsl:apply-templates>
<xsl:text>}</xsl:text>
</xsl:when>
<xsl:otherwise><xsl:text>, nil</xsl:text></xsl:otherwise>
</xsl:choose>
<xsl:text>)</xsl:text>
</xsl:if>
</xsl:template>

<xsl:template match="operator-dictionary">
{ <xsl:for-each select="@*">
<xsl:sort select="name()" />
<xsl:value-of select="name()" /> = <xsl:call-template name="format-value">
<xsl:with-param name="value" select="." />
</xsl:call-template><xsl:if test="position() != last()">, </xsl:if>
</xsl:for-each> }<xsl:if test="position() != last()">,</xsl:if>
</xsl:template>

</xsl:stylesheet>
Loading

0 comments on commit 847e209

Please sign in to comment.