Skip to content

Commit

Permalink
Merge pull request #2113 from Omikhleia/fix-ast-differences
Browse files Browse the repository at this point in the history
  • Loading branch information
alerque authored Oct 3, 2024
2 parents 17371f3 + ea58f4c commit 7830833
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 55 deletions.
2 changes: 1 addition & 1 deletion core/sile.lua
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ function SILE.process (ast)
content()
elseif SILE.Commands[content.command] then
SILE.call(content.command, content.options, content)
elseif content.id == "content" or (not content.command and not content.id) then
elseif not content.command and not content.id then
local pId = SILE.traceStack:pushContent(content, "content")
SILE.process(content)
SILE.traceStack:pop(pId)
Expand Down
4 changes: 2 additions & 2 deletions core/utilities/ast.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ function ast.debug (tree, level)
local out = string.rep(" ", 1 + level)
if level == 0 then
SU.debug("ast", function ()
return "[" .. SILE.currentlyProcessingFile
return "[" .. (SILE.currentlyProcessingFile or "<nowhere>")
end)
end
if type(tree) == "function" then
Expand All @@ -36,7 +36,7 @@ function ast.debug (tree, level)
if #content >= 1 then
ast.debug(content, level + 1)
end
elseif content.id == "content" or (not content.command and not content.id) then
elseif not content.command and not content.id then
ast.debug(content, level + 1)
else
SU.debug("ast", function ()
Expand Down
4 changes: 4 additions & 0 deletions inputters/base.lua
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ end
function inputter:process (doc)
-- Input parsers can already return multiple ASTs, but so far we only process one
local tree = self:parse(doc)[1]
if SU.debugging("inputter") and SU.debugging("ast") then
SU.debug("inputter", "Dumping AST tree before processing...\n")
SU.dump(tree)
end
self:requireClass(tree)
return SILE.process(tree)
end
Expand Down
104 changes: 77 additions & 27 deletions inputters/sil.lua
Original file line number Diff line number Diff line change
Expand Up @@ -80,49 +80,96 @@ local function getline (str, pos)
return lno, col
end

local function massage_ast (tree, doc)
local function ast_from_parse_tree (tree, doc, depth)
if type(tree) == "string" then
return tree
end

if tree.pos then
tree.lno, tree.col = getline(doc, tree.pos)
tree.pos = nil
end
SU.debug("inputter", "Processing ID:", tree.id)
if false or tree.id == "comment" then
SU.debug("inputter", "Discarding comment:", pl.stringx.strip(tree[1]))
return {}

local sep -- luacheck: ignore 211
if SU.debugging("inputter") then
depth = depth + 1
sep = (" "):rep(depth)
end
SU.debug("inputter", sep and (sep .. "Processing ID:"), tree.id)

local res
if tree.id == "comment" then
-- Drop comments
SU.debug("inputter", sep and (sep .. "Discarding comment"))
res = {}
elseif
false
or tree.id == "document"
or tree.id == "braced_content"
or tree.id == "passthrough_content"
or tree.id == "braced_passthrough_content"
or tree.id == "env_passthrough_content"
then
SU.debug("inputter", "Re-massage subtree", tree.id)
return massage_ast(tree[1], doc)
elseif
false
or tree.id == "text"
or tree.id == "passthrough_text"
or tree.id == "braced_passthrough_text"
or tree.id == "env_passthrough_text"
then
SU.debug("inputter", " - Collapse subtree")
return tree[1]
elseif false or tree.id == "content" or tree.id == "environment" or tree.id == "command" then
SU.debug("inputter", " - Massage in place", tree.id)
for key, val in ipairs(tree) do
SU.debug("inputter", " -", val.id)
if val.id == "content" then
SU.splice(tree, key, key, massage_ast(val, doc))
elseif val.id then -- requiring an id discards nodes with no content such as comments
tree[key] = massage_ast(val, doc)
-- These nodes have only one child, which needs recursion.
SU.debug("inputter", sep and (sep .. "Massaging a node"))
res = ast_from_parse_tree(tree[1], doc, depth)
--res = #res > 1 and not res.id and res or res[1]
elseif false or tree.id == "environment" or tree.id == "command" then
-- These nodes have multiple children, which need recursion.
SU.debug("inputter", sep and (sep .. "Processing command"), tree.command, #tree, "subtrees")
local newtree = { -- I don't think we can avoid a shallow copy here
command = tree.command,
options = tree.options,
id = tree.id,
lno = tree.lno,
col = tree.col,
}
for _, node in ipairs(tree) do
if type(node) == "table" then
SU.debug("inputter", sep and (sep .. " -"), node.id or "table")
local ast_node = ast_from_parse_tree(node, doc, depth)
if type(ast_node) == "table" and not ast_node.id then
SU.debug("inputter", sep and (sep .. " -"), "Collapsing subtree")
-- Comments can an empty table, skip them
if #ast_node > 0 then
-- Simplify the tree if it's just a plain list
for _, child in ipairs(ast_node) do
if type(child) ~= "table" or child.id or #child > 0 then
table.insert(newtree, child)
end
end
end
else
table.insert(newtree, ast_node)
end
end
-- Non table nodes are skipped (e.g. extraneous text from 'raw' commands)
end
return tree
res = newtree
elseif tree.id == "content" then
-- This node has multiple children, which need recursion
-- And the node itself needs to be replaced with its children
SU.debug("inputter", sep and (sep .. "Massage content node"), #tree, "subtrees")
local newtree = {} -- I don't think we can avoid a shallow copy here
for i, node in ipairs(tree) do
SU.debug("inputter", sep and (sep .. " -"), node.id)
newtree[i] = ast_from_parse_tree(node, doc, depth)
end
-- Simplify the tree if it has only one child
res = #newtree == 1 and not newtree.id and newtree[1] or newtree
elseif tree.id then
-- Shouldn't happen, or we missed something
SU.error("Unknown node type: " .. tree.id)
else
SU.debug("inputter", sep and (sep .. "Table node"), #tree, "subtrees")
res = #tree == 1 and tree[1] or tree
end
SU.debug("inputter", sep and (sep .. "Returning a"), type(res) == "table" and res.id or "string")
return res
end

function inputter:parse (doc)
Expand All @@ -138,24 +185,27 @@ function inputter:parse (doc)
thrown from document beginning.]]):format(pl.stringx.indent(result, 6)))
end
resetCache()
local top = massage_ast(result[1], doc)
local top = ast_from_parse_tree(result[1], doc, 0)
local tree
-- Content not part of a tagged command could either be part of a document
-- fragment or junk (e.g. comments, whitespace) outside of a document tag. We
-- need to either capture the document tag only or decide this is a fragment
-- and wrap it in a document tag.
for _, leaf in ipairs(top) do
if leaf.command and (leaf.command == "document" or leaf.command == "sile") then
tree = leaf
break
if top.command == "document" or top.command == "sile" then
tree = top
elseif type(top) == "table" then
for _, leaf in ipairs(top) do
if leaf.command and (leaf.command == "document" or leaf.command == "sile") then
tree = leaf
break
end
end
end
-- In the event we didn't isolate a top level document tag above, assume this
-- is a fragment and wrap it in one.
if not tree then
tree = { top, command = "document" }
end
-- SU.dump(tree)
return { tree }
end

Expand Down
48 changes: 29 additions & 19 deletions inputters/sil_spec.lua
Original file line number Diff line number Diff line change
Expand Up @@ -8,68 +8,78 @@ describe("#SIL #inputter", function ()

describe("should parse", function ()
it("commands with content", function ()
local t = inputter:parse([[\foo{bar}]])[1][1][1]
local t = inputter:parse([[\foo{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("commands without content", function ()
local t = inputter:parse([[\foo{\foo bar}]])[1][1][1]
local t = inputter:parse([[\foo{\foo bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("foo", t[1][1].command)
assert.is.equal(" bar", t[1][2])
assert.is.equal(nil, t[1][1][1])
assert.is.equal("foo", t[1].command)
assert.is.equal(" bar", t[2])
assert.is.equal(nil, t[1][1])
end)

it("commands with arg", function ()
local t = inputter:parse([[\foo[baz=qiz]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[baz=qiz]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("qiz", t.options.baz)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("commands with multiple args", function ()
local t = inputter:parse([[\foo[baz=qiz,qiz=baz]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[baz=qiz,qiz=baz]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("qiz", t.options.baz)
assert.is.equal("baz", t.options.qiz)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("commands with quoted arg", function ()
local t = inputter:parse([[\foo[baz="qiz qiz"]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[baz="qiz qiz"]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("qiz qiz", t.options.baz)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("commands with space around args and values", function ()
local t = inputter:parse([[\foo[ baz = qiz qiz ]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[ baz = qiz qiz ]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("qiz qiz", t.options.baz)
end)

it("commands with multiple quoted args", function ()
local t = inputter:parse([[\foo[baz="qiz, qiz",qiz="baz, baz"]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[baz="qiz, qiz",qiz="baz, baz"]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal("qiz, qiz", t.options.baz)
assert.is.equal("baz, baz", t.options.qiz)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("commands with quoted arg with escape", function ()
local t = inputter:parse([[\foo[baz="qiz \"qiz\""]{bar}]])[1][1][1]
local t = inputter:parse([[\foo[baz="qiz \"qiz\""]{bar}]])[1][1]
assert.is.equal("foo", t.command)
assert.is.equal('qiz "qiz"', t.options.baz)
assert.is.equal("bar", t[1][1])
assert.is.equal("bar", t[1])
end)

it("fragments with multiple top level nodes", function ()
local t = inputter:parse([[foo \bar{bar}]])[1]
local t = inputter:parse([[foo \bar{baz}]])[1]
assert.is.equal("document", t.command)
assert.is.equal("foo ", t[1][1])
assert.is.equal("bar", t[1][2].command)
assert.is.equal("bar", t[1][2][1][1])
assert.is.equal("baz", t[1][2][1])
end)

it("commands and environments to equivalent syntax trees", function ()
local t1 = inputter:parse([[\document{\em{emphasis}}]])[1]
local t2 = inputter:parse([[\begin{document}\begin{em}emphasis\end{em}\end{document}]])[1]
-- The "col" positions will differ, and we don't care about them
-- The "id" will differ, make it identical for comparison
local s1 = pl.pretty.write(t1, ""):gsub("col=%d+", "col=N")
local s2 = pl.pretty.write(t2, ""):gsub('id="environment"', 'id="command"'):gsub("col=%d+", "col=N")
assert.is.equal(s1, s2)
end)
end)

Expand Down
8 changes: 2 additions & 6 deletions packages/autodoc/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,8 @@ local function typesetAST (options, content)
else
seenCommandWithoutArg = true
end
elseif ast.id == "content" or (not ast.command and not ast.id) then
-- Due to the way it is implemented, the SILE-inputter may generate such
-- nodes in the AST. It's poorly documented, so it's not clear why they
-- are even kept there (esp. the "content" nodes), but anyhow, as
-- far as autodoc is concerned for presentation purposes, just
-- recurse into them.
elseif not ast.command and not ast.id then
-- Mere list of nodes
typesetAST(options, ast)
else
SU.error("Unrecognized AST element, type " .. type(ast))
Expand Down

0 comments on commit 7830833

Please sign in to comment.