diff --git a/core/sile.lua b/core/sile.lua index a503aa6da..9954b8ba3 100644 --- a/core/sile.lua +++ b/core/sile.lua @@ -415,7 +415,7 @@ function SILE.process (ast) content() elseif SILE.Commands[content.command] then SILE.call(content.command, content.options, content) - elseif content.id == "content" or (not content.command and not content.id) then + elseif not content.command and not content.id then local pId = SILE.traceStack:pushContent(content, "content") SILE.process(content) SILE.traceStack:pop(pId) diff --git a/core/utilities/ast.lua b/core/utilities/ast.lua index bbfe0a669..d78fc9e10 100644 --- a/core/utilities/ast.lua +++ b/core/utilities/ast.lua @@ -15,7 +15,7 @@ function ast.debug (tree, level) local out = string.rep(" ", 1 + level) if level == 0 then SU.debug("ast", function () - return "[" .. SILE.currentlyProcessingFile + return "[" .. (SILE.currentlyProcessingFile or "") end) end if type(tree) == "function" then @@ -36,7 +36,7 @@ function ast.debug (tree, level) if #content >= 1 then ast.debug(content, level + 1) end - elseif content.id == "content" or (not content.command and not content.id) then + elseif not content.command and not content.id then ast.debug(content, level + 1) else SU.debug("ast", function () diff --git a/inputters/base.lua b/inputters/base.lua index 981b430a3..d8d684b2c 100644 --- a/inputters/base.lua +++ b/inputters/base.lua @@ -49,6 +49,10 @@ end function inputter:process (doc) -- Input parsers can already return multiple ASTs, but so far we only process one local tree = self:parse(doc)[1] + if SU.debugging("inputter") and SU.debugging("ast") then + SU.debug("inputter", "Dumping AST tree before processing...\n") + SU.dump(tree) + end self:requireClass(tree) return SILE.process(tree) end diff --git a/inputters/sil.lua b/inputters/sil.lua index 3ff06c0cd..a0e8b636a 100644 --- a/inputters/sil.lua +++ b/inputters/sil.lua @@ -80,18 +80,28 @@ local function getline (str, pos) return lno, col end -local function massage_ast (tree, doc) +local function ast_from_parse_tree (tree, doc, depth) if type(tree) == "string" then return tree end + if tree.pos then tree.lno, tree.col = getline(doc, tree.pos) tree.pos = nil end - SU.debug("inputter", "Processing ID:", tree.id) - if false or tree.id == "comment" then - SU.debug("inputter", "Discarding comment:", pl.stringx.strip(tree[1])) - return {} + + local sep -- luacheck: ignore 211 + if SU.debugging("inputter") then + depth = depth + 1 + sep = (" "):rep(depth) + end + SU.debug("inputter", sep and (sep .. "Processing ID:"), tree.id) + + local res + if tree.id == "comment" then + -- Drop comments + SU.debug("inputter", sep and (sep .. "Discarding comment")) + res = {} elseif false or tree.id == "document" @@ -99,30 +109,67 @@ local function massage_ast (tree, doc) or tree.id == "passthrough_content" or tree.id == "braced_passthrough_content" or tree.id == "env_passthrough_content" - then - SU.debug("inputter", "Re-massage subtree", tree.id) - return massage_ast(tree[1], doc) - elseif - false or tree.id == "text" or tree.id == "passthrough_text" or tree.id == "braced_passthrough_text" or tree.id == "env_passthrough_text" then - SU.debug("inputter", " - Collapse subtree") - return tree[1] - elseif false or tree.id == "content" or tree.id == "environment" or tree.id == "command" then - SU.debug("inputter", " - Massage in place", tree.id) - for key, val in ipairs(tree) do - SU.debug("inputter", " -", val.id) - if val.id == "content" then - SU.splice(tree, key, key, massage_ast(val, doc)) - elseif val.id then -- requiring an id discards nodes with no content such as comments - tree[key] = massage_ast(val, doc) + -- These nodes have only one child, which needs recursion. + SU.debug("inputter", sep and (sep .. "Massaging a node")) + res = ast_from_parse_tree(tree[1], doc, depth) + --res = #res > 1 and not res.id and res or res[1] + elseif false or tree.id == "environment" or tree.id == "command" then + -- These nodes have multiple children, which need recursion. + SU.debug("inputter", sep and (sep .. "Processing command"), tree.command, #tree, "subtrees") + local newtree = { -- I don't think we can avoid a shallow copy here + command = tree.command, + options = tree.options, + id = tree.id, + lno = tree.lno, + col = tree.col, + } + for _, node in ipairs(tree) do + if type(node) == "table" then + SU.debug("inputter", sep and (sep .. " -"), node.id or "table") + local ast_node = ast_from_parse_tree(node, doc, depth) + if type(ast_node) == "table" and not ast_node.id then + SU.debug("inputter", sep and (sep .. " -"), "Collapsing subtree") + -- Comments can an empty table, skip them + if #ast_node > 0 then + -- Simplify the tree if it's just a plain list + for _, child in ipairs(ast_node) do + if type(child) ~= "table" or child.id or #child > 0 then + table.insert(newtree, child) + end + end + end + else + table.insert(newtree, ast_node) + end end + -- Non table nodes are skipped (e.g. extraneous text from 'raw' commands) end - return tree + res = newtree + elseif tree.id == "content" then + -- This node has multiple children, which need recursion + -- And the node itself needs to be replaced with its children + SU.debug("inputter", sep and (sep .. "Massage content node"), #tree, "subtrees") + local newtree = {} -- I don't think we can avoid a shallow copy here + for i, node in ipairs(tree) do + SU.debug("inputter", sep and (sep .. " -"), node.id) + newtree[i] = ast_from_parse_tree(node, doc, depth) + end + -- Simplify the tree if it has only one child + res = #newtree == 1 and not newtree.id and newtree[1] or newtree + elseif tree.id then + -- Shouldn't happen, or we missed something + SU.error("Unknown node type: " .. tree.id) + else + SU.debug("inputter", sep and (sep .. "Table node"), #tree, "subtrees") + res = #tree == 1 and tree[1] or tree end + SU.debug("inputter", sep and (sep .. "Returning a"), type(res) == "table" and res.id or "string") + return res end function inputter:parse (doc) @@ -138,16 +185,20 @@ function inputter:parse (doc) thrown from document beginning.]]):format(pl.stringx.indent(result, 6))) end resetCache() - local top = massage_ast(result[1], doc) + local top = ast_from_parse_tree(result[1], doc, 0) local tree -- Content not part of a tagged command could either be part of a document -- fragment or junk (e.g. comments, whitespace) outside of a document tag. We -- need to either capture the document tag only or decide this is a fragment -- and wrap it in a document tag. - for _, leaf in ipairs(top) do - if leaf.command and (leaf.command == "document" or leaf.command == "sile") then - tree = leaf - break + if top.command == "document" or top.command == "sile" then + tree = top + elseif type(top) == "table" then + for _, leaf in ipairs(top) do + if leaf.command and (leaf.command == "document" or leaf.command == "sile") then + tree = leaf + break + end end end -- In the event we didn't isolate a top level document tag above, assume this @@ -155,7 +206,6 @@ function inputter:parse (doc) if not tree then tree = { top, command = "document" } end - -- SU.dump(tree) return { tree } end diff --git a/inputters/sil_spec.lua b/inputters/sil_spec.lua index 2ce98bd0d..78fdf4785 100644 --- a/inputters/sil_spec.lua +++ b/inputters/sil_spec.lua @@ -8,68 +8,78 @@ describe("#SIL #inputter", function () describe("should parse", function () it("commands with content", function () - local t = inputter:parse([[\foo{bar}]])[1][1][1] + local t = inputter:parse([[\foo{bar}]])[1][1] assert.is.equal("foo", t.command) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("commands without content", function () - local t = inputter:parse([[\foo{\foo bar}]])[1][1][1] + local t = inputter:parse([[\foo{\foo bar}]])[1][1] assert.is.equal("foo", t.command) - assert.is.equal("foo", t[1][1].command) - assert.is.equal(" bar", t[1][2]) - assert.is.equal(nil, t[1][1][1]) + assert.is.equal("foo", t[1].command) + assert.is.equal(" bar", t[2]) + assert.is.equal(nil, t[1][1]) end) it("commands with arg", function () - local t = inputter:parse([[\foo[baz=qiz]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[baz=qiz]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal("qiz", t.options.baz) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("commands with multiple args", function () - local t = inputter:parse([[\foo[baz=qiz,qiz=baz]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[baz=qiz,qiz=baz]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal("qiz", t.options.baz) assert.is.equal("baz", t.options.qiz) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("commands with quoted arg", function () - local t = inputter:parse([[\foo[baz="qiz qiz"]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[baz="qiz qiz"]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal("qiz qiz", t.options.baz) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("commands with space around args and values", function () - local t = inputter:parse([[\foo[ baz = qiz qiz ]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[ baz = qiz qiz ]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal("qiz qiz", t.options.baz) end) it("commands with multiple quoted args", function () - local t = inputter:parse([[\foo[baz="qiz, qiz",qiz="baz, baz"]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[baz="qiz, qiz",qiz="baz, baz"]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal("qiz, qiz", t.options.baz) assert.is.equal("baz, baz", t.options.qiz) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("commands with quoted arg with escape", function () - local t = inputter:parse([[\foo[baz="qiz \"qiz\""]{bar}]])[1][1][1] + local t = inputter:parse([[\foo[baz="qiz \"qiz\""]{bar}]])[1][1] assert.is.equal("foo", t.command) assert.is.equal('qiz "qiz"', t.options.baz) - assert.is.equal("bar", t[1][1]) + assert.is.equal("bar", t[1]) end) it("fragments with multiple top level nodes", function () - local t = inputter:parse([[foo \bar{bar}]])[1] + local t = inputter:parse([[foo \bar{baz}]])[1] assert.is.equal("document", t.command) assert.is.equal("foo ", t[1][1]) assert.is.equal("bar", t[1][2].command) - assert.is.equal("bar", t[1][2][1][1]) + assert.is.equal("baz", t[1][2][1]) + end) + + it("commands and environments to equivalent syntax trees", function () + local t1 = inputter:parse([[\document{\em{emphasis}}]])[1] + local t2 = inputter:parse([[\begin{document}\begin{em}emphasis\end{em}\end{document}]])[1] + -- The "col" positions will differ, and we don't care about them + -- The "id" will differ, make it identical for comparison + local s1 = pl.pretty.write(t1, ""):gsub("col=%d+", "col=N") + local s2 = pl.pretty.write(t2, ""):gsub('id="environment"', 'id="command"'):gsub("col=%d+", "col=N") + assert.is.equal(s1, s2) end) end) diff --git a/packages/autodoc/init.lua b/packages/autodoc/init.lua index 8a33d476b..a9c9d8677 100644 --- a/packages/autodoc/init.lua +++ b/packages/autodoc/init.lua @@ -101,12 +101,8 @@ local function typesetAST (options, content) else seenCommandWithoutArg = true end - elseif ast.id == "content" or (not ast.command and not ast.id) then - -- Due to the way it is implemented, the SILE-inputter may generate such - -- nodes in the AST. It's poorly documented, so it's not clear why they - -- are even kept there (esp. the "content" nodes), but anyhow, as - -- far as autodoc is concerned for presentation purposes, just - -- recurse into them. + elseif not ast.command and not ast.id then + -- Mere list of nodes typesetAST(options, ast) else SU.error("Unrecognized AST element, type " .. type(ast))