Skip to content

Commit

Permalink
Merge pull request #2187 from Omikhleia/math-accent-redo
Browse files Browse the repository at this point in the history
feat(math): Minimal support for accents in MathML and TeX-like commands
  • Loading branch information
alerque authored Dec 5, 2024
2 parents cdc3ba9 + dd0b1b5 commit c9756a2
Show file tree
Hide file tree
Showing 7 changed files with 352 additions and 42 deletions.
2 changes: 1 addition & 1 deletion packages/math/atoms.lua
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ local atomType = {
over = 8, -- Unused for now (used for overlines etc. in The TeXbook)
under = 9, -- Unused for now (used for underlines etc. in The TeXbook)
accent = 10,
botaccent = 11, -- Unused for now but botaccent is encoded in our dictionary
botaccent = 11,
}

return { types = atomType }
105 changes: 78 additions & 27 deletions packages/math/base-elements.lua
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,18 @@ local function isNotEmpty (element)
return element and (element:is_a(elements.terminal) or #element.children > 0)
end

local function getAccentMode (mode)
-- Size unchanged but leave display mode
-- See MathML Core §3.4.3
if mode == mathMode.display then
return mathMode.text
end
if mode == mathMode.displayCramped then
return mathMode.textCramped
end
return mode
end

local function unwrapSingleElementMrow (elt)
-- CODE SMELL.
-- For \overset or \underset in LaTeX, MathML would use <mover> or <munder>.
Expand All @@ -748,10 +760,13 @@ local function unwrapSingleElementMrow (elt)
end
end

function elements.underOver:_init (base, sub, sup)
function elements.underOver:_init (attributes, base, sub, sup)
elements.mbox._init(self)
base = unwrapSingleElementMrow(base)
self.atom = base.atom
self.attributes = attributes or {}
self.attributes.accent = SU.boolean(self.attributes.accent, false)
self.attributes.accentunder = SU.boolean(self.attributes.accentunder, false)
self.base = base
self.sub = isNotEmpty(sub) and sub or nil
self.sup = isNotEmpty(sup) and sup or nil
Expand All @@ -771,10 +786,10 @@ function elements.underOver:styleChildren ()
self.base.mode = self.mode
end
if self.sub then
self.sub.mode = getSubscriptMode(self.mode)
self.sub.mode = self.attributes.accentunder and getAccentMode(self.mode) or getSubscriptMode(self.mode)
end
if self.sup then
self.sup.mode = getSuperscriptMode(self.mode)
self.sup.mode = self.attributes.accent and getAccentMode(self.mode) or getSuperscriptMode(self.mode)
end
end

Expand Down Expand Up @@ -816,7 +831,10 @@ function elements.underOver:_stretchyReshapeToBase (part)
end

function elements.underOver:shape ()
local constants = self:getMathMetrics().constants
local scaleDown = self:getScaleDown()
local isMovableLimits = SU.boolean(self.base and self.base.movablelimits, false)
local itCorr = self:calculateItalicsCorrection() * scaleDown
if not (self.mode == mathMode.display or self.mode == mathMode.displayCramped) and isMovableLimits then
-- When the base is a movable limit, the under/over scripts are not placed under/over the base,
-- but other to the right of it, when display mode is not used.
Expand All @@ -827,32 +845,54 @@ function elements.underOver:shape ()
elements.subscript.shape(self)
return
end
local constants = self:getMathMetrics().constants
local scaleDown = self:getScaleDown()
-- Determine relative Ys
if self.base then
self.base.relY = SILE.types.length(0)
end
if self.sub then
self:_stretchyReshapeToBase(self.sub)
self.sub.relY = self.base.depth
+ SILE.types.length(
math.max(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
constants.lowerLimitBaselineDropMin * scaleDown
-- TODO These rules are incomplete and even wrong if we were to fully implement MathML Core.
if self.attributes.accentunder then
self.sub.relY = self.base.depth
+ SILE.types.length(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber()
-- We assume that the accent is aligned on the base.
)
)
else
self.sub.relY = self.base.depth
+ SILE.types.length(
math.max(
(self.sub.height + constants.lowerLimitGapMin * scaleDown):tonumber(),
constants.lowerLimitBaselineDropMin * scaleDown
)
)
end
end
if self.sup then
self:_stretchyReshapeToBase(self.sup)
self.sup.relY = 0
- self.base.height
- SILE.types.length(
math.max(
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
constants.upperLimitBaselineRiseMin * scaleDown
-- TODO These rules are incomplete if we were to fully implement MathML Core.
if self.attributes.accent then
self.sup.relY = 0 - self.base.height
-- MathML Core wants to align on the accentBaseHeight...
local overShift = math.max(0, constants.accentBaseHeight * scaleDown - self.base.height:tonumber())
self.sup.relY = self.sup.relY - SILE.types.length(overShift)
-- HACK: .... but improperly dimensioned accents can overshoot the base glyph.
-- So we try some guesswork to correct this.
-- Typically some non-combining symbols are in this case...
local heuristics = 0.5 * constants.flattenedAccentBaseHeight + 0.5 * constants.accentBaseHeight
if self.sup.height > SILE.types.length(heuristics * scaleDown) then
self.sup.relY = self.sup.relY + SILE.types.length(constants.accentBaseHeight * scaleDown)
end
else
self.sup.relY = 0
- self.base.height
- SILE.types.length(
math.max(
(constants.upperLimitGapMin * scaleDown + self.sup.depth):tonumber(),
constants.upperLimitBaselineRiseMin * scaleDown
)
)
)
end
end
-- Determine relative Xs based on widest symbol
local widest, a, b
Expand Down Expand Up @@ -893,7 +933,6 @@ function elements.underOver:shape ()
if b then
b.relX = c - b.width / 2
end
local itCorr = self:calculateItalicsCorrection() * scaleDown
if self.sup then
self.sup.relX = self.sup.relX + itCorr / 2
end
Expand Down Expand Up @@ -1201,7 +1240,10 @@ end
function elements.text:_vertStretchyReshape (depth, height)
local hasStretched = self:_stretchyReshape(depth + height, true)
if hasStretched then
-- HACK: see output routine
-- RESCALING HACK: see output routine
-- We only do it if the scaling logic found constructions on the vertical block axis.
-- It's a dirty hack until we properly implement assembly of glyphs in the case we couldn't
-- find a big enough variant.
self.vertExpectedSz = height + depth
self.vertScalingRatio = (depth + height):tonumber() / (self.height:tonumber() + self.depth:tonumber())
self.height = height
Expand All @@ -1212,12 +1254,21 @@ end

function elements.text:_horizStretchyReshape (width)
local hasStretched = self:_stretchyReshape(width, false)
if hasStretched then
-- HACK: see output routine
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
self.width = width
end
return hasStretched
if not hasStretched and width:tonumber() < self.width:tonumber() then
-- Never shrink glyphs, it looks ugly
return false
end
-- But if stretching couldn't be done, it will be ugly anyway, so we will force
-- a re-scaling of the glyph.
-- (So it slightly different from the vertical case, 'cause MathML just has one stretchy
-- attribute, whether for stretching on the vertical (block) or horizontal (inline) axis,
-- and we cannot know which axis is meant unless we implement yet another mapping table
-- as the one in the MathML Core appendices. Frankly, how many non-normative appendices
-- do we need to implement MathML correctly?)
-- RESCALING HACK: see output routine
self.horizScalingRatio = width:tonumber() / self.width:tonumber()
self.width = width
return true
end

function elements.text:output (x, y, line)
Expand Down Expand Up @@ -1356,7 +1407,7 @@ local function newSubscript (spec)
end

local function newUnderOver (spec)
return elements.underOver(spec.base, spec.sub, spec.sup)
return elements.underOver(spec.attributes, spec.base, spec.sub, spec.sup)
end

-- TODO replace with penlight equivalent
Expand Down
2 changes: 1 addition & 1 deletion packages/math/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ The \code{counter} or the direct value \code{number} is passed as a parameter to
\paragraph{Missing features}
This package still lacks support for some mathematical constructs, but hopefully we’ll get there.
Among unsupported constructs are: decorating symbols with so-called accents, such as arrows or hats, “over” or “under” braces, and line breaking inside a formula.
Among unsupported features, we can mention line breaking inside a formula.
\font:remove-fallback
\end{document}
Expand Down
122 changes: 113 additions & 9 deletions packages/math/texlike.lua
Original file line number Diff line number Diff line change
Expand Up @@ -396,9 +396,24 @@ local function isOperatorKind (tree, typeOfAtom)
return false
end

local function isMoveableLimits (tree)
local function isMoveableLimitsOrAlwaysStacked (tree)
if not tree then
return false -- safeguard
end
if tree.is_always_stacked then
-- We use an internal flag to mark commands that are always stacking
-- their sup/sub arguments, such as brace-like commands.
return true
end
if tree.command ~= "mo" then
return false
-- On the recursion:
-- MathML allows movablelimits on <mo> elements, but "embellished operators"
-- can be other elements inheriting the property from their "core operator",
-- see MathML Core §3.2.4.1, which is full of intricacies so we are probably
-- not even doing the right thing here.
-- On the hack:
-- See variant commands for limits further down.
return SU.boolean(tree.is_hacked_movablelimits, false) or isMoveableLimitsOrAlwaysStacked(tree[1])
end
if tree.options and SU.boolean(tree.options.movablelimits, false) then
return true
Expand Down Expand Up @@ -430,6 +445,9 @@ end
local function isAccentSymbol (symbol)
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.accent
end
local function isBottomAccentSymbol (symbol)
return operatorDict[symbol] and operatorDict[symbol].atom == atoms.types.botaccent
end

local function compileToMathML_aux (_, arg_env, tree)
if type(tree) == "string" then
Expand Down Expand Up @@ -565,14 +583,15 @@ local function compileToMathML_aux (_, arg_env, tree)
end
tree.options = {}
-- Translate TeX-like sub/superscripts to `munderover` or `msubsup`,
-- depending on whether the base is an operator with moveable limits.
elseif tree.id == "sup" and isMoveableLimits(tree[1]) then
-- depending on whether the base is an operator with moveable limits,
-- or a brace-like command.
elseif tree.id == "sup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "mover"
elseif tree.id == "sub" and isMoveableLimits(tree[1]) then
elseif tree.id == "sub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munder"
elseif tree.id == "subsup" and isMoveableLimits(tree[1]) then
elseif tree.id == "subsup" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munderover"
elseif tree.id == "supsub" and isMoveableLimits(tree[1]) then
elseif tree.id == "supsub" and isMoveableLimitsOrAlwaysStacked(tree[1]) then
tree.command = "munderover"
local tmp = tree[2]
tree[2] = tree[3]
Expand Down Expand Up @@ -638,7 +657,7 @@ local function compileToMathML_aux (_, arg_env, tree)
elseif tree.id == "command" and symbols[tree.command] then
local atom = { id = "atom", [1] = symbols[tree.command] }
if isAccentSymbol(symbols[tree.command]) and #tree > 0 then
-- LaTeX-style accents \vec{v} = <mover accent="true"><mi>v</mi><mo></mo></mover>
-- LaTeX-style accents \overrightarrow{v} = <mover accent="true"><mi>v</mi><mo>&#x20D7;</mo></mover>
local accent = {
id = "command",
command = "mover",
Expand All @@ -649,6 +668,18 @@ local function compileToMathML_aux (_, arg_env, tree)
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
accent[2] = compileToMathML_aux(nil, arg_env, atom)
tree = accent
elseif isBottomAccentSymbol(symbols[tree.command]) and #tree > 0 then
-- LaTeX-style bottom accents \underleftarrow{v} = <munder accent="true"><mi>v</mi><mo>&#x20EE;</mo></munder>
local accent = {
id = "command",
command = "munder",
options = {
accentunder = "true",
},
}
accent[1] = compileToMathML_aux(nil, arg_env, tree[1])
accent[2] = compileToMathML_aux(nil, arg_env, atom)
tree = accent
elseif #tree > 0 then
-- Play cool with LaTeX-style commands that don't take arguments:
-- Edge case for non-accent symbols so we don't loose bracketed groups
Expand Down Expand Up @@ -728,6 +759,80 @@ registerCommand("mn", { [1] = objType.str }, function (x)
return x
end)

-- Register a limit-like variant command
-- Variants of superior, inferior, projective and injective limits are special:
-- They accept a sub/sup behaving as a movablelimits, but also have a symbol
-- on top of the limit symbol, which is not a movablelimits.
-- I can't see in the MathML specification how to do this properly: MathML Core
-- seems to only allow movablelimits on <mo> elements, and <mover>/<munder> may
-- inherit that property from their "core operator", but in this case we do not
-- want the accent to be movable, only the limit sup/sub.
-- So we use a hack, and also avoid "\def" here to prevent unwanted mrows.
-- @tparam string name TeX command name
-- @tparam string command MathML command (mover or munder)
-- @tparam number symbol Unicode codepoint for the accent symbol
-- @tparam string text Text representation
local function registerVarLimits (name, command, symbol, text)
registerCommand(name, {}, function ()
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
return {
command = command,
is_hacked_movablelimits = true, -- Internal flag to mark this as a hack
options = options,
{
command = "mo",
options = { atom = "op", movablelimits = false },
text,
},
{
command = "mo",
options = { accentunder = "true" },
luautf8.char(symbol),
},
}
end)
end
registerVarLimits("varlimsup", "mover", 0x203E, "lim") -- U+203E OVERLINE
registerVarLimits("varliminf", "munder", 0x203E, "lim") -- U+203E OVERLINE
registerVarLimits("varprojlim", "munder", 0x2190, "lim") -- U+2190 LEFTWARDS ARROW
registerVarLimits("varinjlim", "munder", 0x2192, "lim") -- U+2192 RIGHTWARDS ARROW

-- Register a brace-like commands.
-- Those symbols are accents per-se in MathML, and are non-combining in Unicode.
-- But TeX treats them as "pseudo-accent" stretchy symbols.
-- Moreover, they accept a sub/sup which is always stacked, and not movable.
-- So we use an internal flag.
-- We also avoid "\def" here to prevent unwanted mrows resulting from the
-- compilation of the argument.
-- @tparam string name TeX command name
-- @tparam string command MathML command (mover or munder)
-- @tparam number symbol Unicode codepoint for the brace symbol
local function registerBraceLikeCommands (name, command, symbol)
registerCommand(name, {
[1] = objType.tree,
}, function (tree)
local options = command == "mover" and { accent = "true" } or { accentunder = "true" }
return {
command = command,
is_always_stacked = true, -- Internal flag to mark this as a brace-like command
options = options,
tree[1],
{
command = "mo",
options = { stretchy = "true" },
luautf8.char(symbol),
},
}
end)
end
-- Note: the following overriddes the default commands from xml-entities / unicode-math.
registerBraceLikeCommands("overbrace", "mover", 0x23DE) -- U+23DE TOP CURLY BRACKET
registerBraceLikeCommands("underbrace", "munder", 0x23DF) -- U+23DF BOTTOM CURLY BRACKET
registerBraceLikeCommands("overparen", "mover", 0x23DC) -- U+23DC TOP PARENTHESIS
registerBraceLikeCommands("underparen", "munder", 0x23DD) -- U+23DD BOTTOM PARENTHESIS
registerBraceLikeCommands("overbracket", "mover", 0x23B4) -- U+23B4 TOP SQUARE BRACKET
registerBraceLikeCommands("underbracket", "munder", 0x23B5) -- U+23B5 BOTTOM SQUARE BRACKET

compileToMathML(
nil,
{},
Expand All @@ -737,7 +842,6 @@ compileToMathML(
\def{sqrt}{\msqrt{#1}}
\def{bi}{\mi[mathvariant=bold-italic]{#1}}
\def{dsi}{\mi[mathvariant=double-struck]{#1}}
\def{vec}{\mover[accent=true]{#1}{\rightarrow}}
% From amsmath:
\def{to}{\mo[atom=bin]{→}}
Expand Down
Loading

0 comments on commit c9756a2

Please sign in to comment.