Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Use non-lazy AST traversal for filters when possible #11052

Draft
wants to merge 41 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
bf4ea8a
Add `jog` Lua module
tarleb Sep 13, 2024
a06432b
Use jog instead of walk
tarleb Sep 13, 2024
2fefec0
Force use of pandoc walk for some filters
tarleb Sep 15, 2024
6e725bf
Add mechanism to check `jog` results
tarleb Oct 13, 2024
7d4f51d
Make jog more forgiving
tarleb Oct 15, 2024
35d1bbc
Add attribute checks
tarleb Oct 23, 2024
756a355
Rely less on pandoc's type implicit conversions
tarleb Oct 24, 2024
85eab47
Prevent unintended side-effects of filter functions
tarleb Nov 18, 2024
5f9f89b
table-rawhtml: destructively modify the input list
tarleb Nov 19, 2024
2df4c1b
lightbox: Restart imgCount each time the filter is invoked.
tarleb Nov 19, 2024
73ff5aa
Allow checks for inadvertent AST modifications
tarleb Nov 19, 2024
53fd9f8
Always use jog, except for user-filters
tarleb Nov 19, 2024
e19df02
Use jog for more filters
tarleb Nov 19, 2024
cc7d692
[REVIEW REQUIRED] Return modified meta object from init filter
tarleb Nov 19, 2024
51c8800
[WIP] Cleanup parsefiguredivs filters
tarleb Nov 19, 2024
7bbcf7c
Make `jog` more robust when traversing topdown
tarleb Nov 19, 2024
260accd
Ensure proper types in Meta objects
tarleb Nov 19, 2024
5b66cab
Use proper metadata types when resolving dependencies
tarleb Nov 19, 2024
139faac
Better type checking for meta values
tarleb Nov 19, 2024
d7dfc72
Enable attribute checks only if QUARTO_JOG_CHECK is set
tarleb Nov 20, 2024
13de165
Enable "jog" debugging in smoke tests
tarleb Oct 13, 2024
2b98677
Ensure that the internal Meta copy is a true, deep copy
tarleb Nov 20, 2024
c23f32d
More MetaValue typecheck fixes
tarleb Nov 20, 2024
488a7cf
Allow to pass extra info to conversion warner
tarleb Nov 20, 2024
4643a27
Fix MetaValue conversions for empty Lists
tarleb Nov 20, 2024
bb711e0
[FIXME] This fixes things. I have no idea why
tarleb Nov 20, 2024
598180c
Don't add number values to the metadata object
tarleb Nov 20, 2024
56898c1
attribcheck: improve robustness of warn function
tarleb Nov 20, 2024
57ea8f3
Fix crash when checking Meta values for mutations
tarleb Nov 20, 2024
dac3b25
Custom nodes: cleanup code for slots/forwarder
tarleb Nov 21, 2024
5c4c6d0
Fix type of return value in typst filter function
tarleb Nov 21, 2024
e94c194
Attribcheck: Export type checker function
tarleb Nov 21, 2024
759d275
Jog check: Validate Meta types after each emulated filter
tarleb Nov 21, 2024
af3e1b0
Fix type confusion in crossref code
tarleb Nov 21, 2024
9e3a4a2
Add type-checking for some Cell attributes
tarleb Nov 21, 2024
b1f23d2
Fix copy table
tarleb Nov 21, 2024
c52787c
Error out on failed attribute type checks
tarleb Nov 21, 2024
0ef5b59
Accomodate jog and attrcheck to pandoc dev version
tarleb Nov 22, 2024
307aff4
Fix type in Caption
tarleb Nov 22, 2024
4fb1c0a
Fix more caption types
tarleb Nov 22, 2024
8b4d18a
[FIXME] Just want to try if this works
tarleb Nov 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test-smokes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ jobs:
- os: windows-latest
time-test: true
runs-on: ${{ matrix.os }}
env:
QUARTO_JOG_CHECK: true
steps:
- name: Checkout Repo
uses: actions/checkout@v4
Expand Down
48 changes: 32 additions & 16 deletions src/resources/filters/ast/customnodes.lua
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ function is_regular_node(node, name)
return node
end

function run_emulated_filter(doc, filter)
function run_emulated_filter(doc, filter, force_use_walk)
if doc == nil then
return nil
end
Expand Down Expand Up @@ -73,7 +73,10 @@ function run_emulated_filter(doc, filter)
-- luacov: enable
end
end
return node:walk(filter_param)
_QUARTO_USE_WALK = force_use_walk
local result = _quarto.modules.jog(node, filter_param)
_QUARTO_USE_WALK = false
return result
end

-- performance: if filter is empty, do nothing
Expand Down Expand Up @@ -240,6 +243,11 @@ function create_custom_node_scaffold(t, context)
return result
end


local ptype = pandoc.utils.type
local InlinesMT = getmetatable(pandoc.Inlines{})
local BlocksMT = getmetatable(pandoc.Blocks{})

function create_emulated_node(t, tbl, context, forwarder)
local result = create_custom_node_scaffold(t, context)
tbl.t = t -- set t always to custom ast type
Expand Down Expand Up @@ -337,21 +345,27 @@ _quarto.ast = {
end
local node = node_accessor(table)
local t = pandoc.utils.type(value)
-- FIXME this is broken; that can only be "Block", "Inline", etc
if t == "Div" or t == "Span" then
local custom_data, t, kind = _quarto.ast.resolve_custom_data(value)
if custom_data ~= nil then
value = custom_data
end
end
quarto_assert(t ~= 'Div' and t ~= 'Span', "")
if index > #node.content then
_quarto.ast.grow_scaffold(node, index)
end
local pt = pandoc.utils.type(value)
if pt == "Block" or pt == "Inline" then
node.content[index].content = {value}
local inner_node = node.content[index]
quarto_assert(inner_node ~= nil)
if t == "Block" then
inner_node.content = quarto.utils.as_blocks(value)
elseif t == "Inline" then
inner_node.content = quarto.utils.as_inlines(value)
else
node.content[index].content = value
local innertype = pandoc.utils.type(inner_node)
if innertype == 'Block' then
inner_node.content = quarto.utils.as_blocks(value)
elseif innertype == 'Inline' then
inner_node.content = quarto.utils.as_inlines(value)
else
warn('Cannot find the right content type for value ' .. t)
warn(debug.traceback())
inner_node.content = value
end
end
end
}
Expand Down Expand Up @@ -422,13 +436,15 @@ _quarto.ast = {
-- luacov: enable
end

local forwarder = { }
local forwarder
if tisarray(handler.slots) then
forwarder = pandoc.List{}
for i, slot in ipairs(handler.slots) do
forwarder[slot] = i
end
else
forwarder = handler.slots
elseif handler.slots ~= nil then
warn('Expected `slots` to be either an array or nil, got ' ..
tostring(handler.slots))
end

quarto[handler.ast_name] = function(params)
Expand Down
3 changes: 3 additions & 0 deletions src/resources/filters/ast/emulatedfilter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,9 @@ inject_user_filters_at_entry_points = function(filter_list)
end
local filter = {
name = entry_point .. "-user-" .. tostring(entry_point_counts[entry_point]),
-- The filter might not work as expected when doing a non-lazy jog, so
-- make sure it is processed with the default 'walk' function.
force_pandoc_walk = true,
}
if is_many_filters then
filter.filters = wrapped
Expand Down
104 changes: 102 additions & 2 deletions src/resources/filters/ast/runemulation.lua
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,97 @@ local function remove_vault(doc)
end
end

--- Create a deep copy of a table.
local function copy_table (tbl, depth, seen)
local tp = type(tbl)
if tp == 'table' then
local copy = {}
-- Iterate 'raw' pairs, i.e., without using metamethods
for key, value in next, tbl, nil do
if depth == 'shallow' then
copy[key] = value
else
copy[copy_table(key)] = copy_table(value)
end
end
return setmetatable(copy, getmetatable(tbl))
elseif tp == 'userdata' then
return tbl:clone()
else -- number, string, boolean, etc
return tbl
end
end

--- Checks if two tables are equal
function equals(o1, o2)
if o1 == o2 then
return true
end
local o1type = type(o1)
local o2type = type(o2)
if o1type ~= o2type or o1type ~= 'table' then
return false
end

local keys = {}

for key1, value1 in pairs(o1) do
local value2 = o2[key1]
if value2 == nil or equals(value1, value2) == false then
return false
end
keys[key1] = true
end

for key2 in pairs(o2) do
if not keys[key2] then return false end
end
return true
end

--- Checks if a filter follows the "nondestructive" property.
-- The nondestructive property is fulfilled if filter functions returns
-- an explicit object, or if it returns `nil` while leaving the passed
-- in object unmodified.
--
-- An error is raised if the property is violated.
--
-- Only filters with this property can use jog safely, without
-- unintended consequences.
local function check_nondestructive_property (namedfilter)
for name, fn in pairs(namedfilter.filter) do
if type(fn) == 'function' then
local copy = function (x)
local tp = type(x)
return tp ~= 'table' and x:clone() or
(pandoc.utils.type(x) == 'Meta' and pandoc.Meta(x) or copy_table(x))
end
namedfilter.filter[name] = function (obj, context)
local orig = copy(obj)
local result, descend = fn(obj, context)
if result == nil then
if type(obj) ~= 'table' and not equals(obj, orig) then
warn(
"\nFunction '" .. name .. "' in filter '" .. namedfilter.name ..
"' returned `nil`, but modified the input."
)
end
-- elseif result.t == obj.t and not rawequal(result, obj) then
-- warn(
-- "\nFunction '" .. name .. "' in filter '" .. namedfilter.name ..
-- "' returned a new object instead of passing the original one through."
-- )
end
return result, descend
end
end
end
return namedfilter
end

local function run_emulated_filter_chain(doc, filters, afterFilterPass, profiling)
init_trace(doc)
local compare_jog_and_walk = os.getenv 'QUARTO_JOG_CHECK'
for i, v in ipairs(filters) do
local function callback()
if v.flags then
Expand Down Expand Up @@ -79,7 +168,18 @@ local function run_emulated_filter_chain(doc, filters, afterFilterPass, profilin
print(pandoc.write(doc, "native"))
else
_quarto.ast._current_doc = doc
doc = run_emulated_filter(doc, v.filter)

if compare_jog_and_walk and not v.force_pandoc_walk then
v = check_nondestructive_property(v)
io.stderr:write('Running: ' .. v.name .. '\n')
end
doc = run_emulated_filter(doc, v.filter, v.force_pandoc_walk)

if compare_jog_and_walk and not v.force_pandoc_walk then
-- Types of meta values are only check on assignment.
doc.meta = doc.meta
end

ensure_vault(doc)

add_trace(doc, v.name)
Expand Down Expand Up @@ -204,4 +304,4 @@ function run_as_extended_ast(specTable)
end

return pandocFilterList
end
end
11 changes: 9 additions & 2 deletions src/resources/filters/common/error.lua
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,15 @@ function fail(message, level)
end
end

function internal_error()
fail("This is an internal error. Please file a bug report at https://github.com/quarto-dev/quarto-cli/", 5)
function internal_error(msg, level)
fail((msg and (msg .. '\n') or '') ..
"This is an internal error. Please file a bug report at https://github.com/quarto-dev/quarto-cli/", level or 5)
end

function quarto_assert (test, msg, level)
if not test then
internal_error(msg, level or 6)
end
end

function currentFile()
Expand Down
2 changes: 1 addition & 1 deletion src/resources/filters/common/layout.lua
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ end
-- we often wrap a table in a div, unwrap it
function tableFromLayoutCell(cell)
local tbl
cell:walk({
_quarto.modules.jog(cell, {
Table = function(t)
tbl = t
end
Expand Down
5 changes: 3 additions & 2 deletions src/resources/filters/common/log.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ function warn(message, offset)
io.stderr:write(lunacolors.yellow("WARNING (" .. caller_info(offset) .. ") " .. message .. "\n"))
end

local orig_error = error
function error(message, offset)
io.stderr:write(lunacolors.red("ERROR (" .. caller_info(offset) .. ") " .. message .. "\n"))
end

function fatal(message, offset)
io.stderr:write(lunacolors.red("FATAL (" .. caller_info(offset) .. ") " ..message .. "\n"))
-- TODO write stack trace into log, and then exit.
crash_with_stack_trace()
orig_error('FATAL QUARTO ERROR', offset)
end
-- luacov: enable
-- luacov: enable
6 changes: 3 additions & 3 deletions src/resources/filters/common/pandoc.lua
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,14 @@ function string_to_quarto_ast_blocks(text, opts)

-- run the whole normalization pipeline here to get extended AST nodes, etc.
for _, filter in ipairs(quarto_ast_pipeline()) do
doc = doc:walk(filter.filter)
doc = _quarto.modules.jog(doc, filter.filter)
end

-- compute flags so we don't skip filters that depend on them
doc:walk(compute_flags())
_quarto.modules.jog(doc, compute_flags())
return doc.blocks
end

function string_to_quarto_ast_inlines(text, sep)
return pandoc.utils.blocks_to_inlines(string_to_quarto_ast_blocks(text), sep)
end
end
4 changes: 2 additions & 2 deletions src/resources/filters/common/wrapped-filter.lua
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ function makeWrappedJsonFilter(scriptFile, filterHandler)
path = quarto.utils.resolve_path_relative_to_document(scriptFile)
local custom_node_map = {}
local has_custom_nodes = false
doc = doc:walk({
doc = _quarto.modules.jog(doc, {
-- FIXME: This is broken with new AST. Needs to go through Custom node instead.
RawInline = function(raw)
local custom_node, t, kind = _quarto.ast.resolve_custom_data(raw)
Expand Down Expand Up @@ -130,7 +130,7 @@ function makeWrappedJsonFilter(scriptFile, filterHandler)
return nil
end
if has_custom_nodes then
doc:walk({
_quarto.modules.jog(doc, {
Meta = function(meta)
_quarto.ast.reset_custom_tbl(meta["quarto-custom-nodes"])
end
Expand Down
2 changes: 1 addition & 1 deletion src/resources/filters/crossref/equations.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ function process_equations(blockEl)
end

local mathInlines = nil
local targetInlines = pandoc.List()
local targetInlines = pandoc.Inlines{}

for i, el in ipairs(inlines) do

Expand Down
2 changes: 1 addition & 1 deletion src/resources/filters/crossref/index.lua
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ end
-- add an entry to the index
function indexAddEntry(label, parent, order, caption, appendix)
if caption ~= nil then
caption = pandoc.List(caption)
caption = pandoc.List{caption}
else
caption = pandoc.List({})
end
Expand Down
2 changes: 1 addition & 1 deletion src/resources/filters/crossref/preprocess.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function crossref_mark_subfloats()
return {
traverse = "topdown",
FloatRefTarget = function(float)
float.content = _quarto.ast.walk(float.content, {
float.content = _quarto.ast.walk(float.content or pandoc.Blocks{}, {
FloatRefTarget = function(subfloat)
float.has_subfloats = true
crossref.subfloats[subfloat.identifier] = {
Expand Down
9 changes: 8 additions & 1 deletion src/resources/filters/customnodes/callout.lua
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,18 @@ function _callout_main()
icon_color = "brand-color." .. callout_theme_color_map[callout.type]
end
end
local function is_nonempty_node (node)
return not not
(node or
(node.content and next(node.content)) or
next(node))
end
if callout.attr.identifier == "" then
return _quarto.format.typst.function_call("callout", {
{ "body", _quarto.format.typst.as_typst_content(callout.content) },
{ "title", _quarto.format.typst.as_typst_content(
callout.title or pandoc.Plain(_quarto.modules.callouts.displayName(callout.type))
(not is_nonempty_node(callout.title) and callout.title) or
pandoc.Plain(_quarto.modules.callouts.displayName(callout.type))
)},
{ "background_color", pandoc.RawInline("typst", background_color) },
{ "icon_color", pandoc.RawInline("typst", icon_color) },
Expand Down
3 changes: 2 additions & 1 deletion src/resources/filters/customnodes/content-hidden.lua
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ local _content_hidden_meta = nil
function content_hidden_meta(meta)
-- return {
-- Meta = function(meta)
_content_hidden_meta = meta
-- The call to `pandoc.Meta` ensures that we hold a copy.
_content_hidden_meta = pandoc.Meta(meta)
-- end
-- }
end
Expand Down
4 changes: 2 additions & 2 deletions src/resources/filters/customnodes/floatreftarget.lua
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,7 @@ function float_reftarget_render_html_figure(float)
local float_content = pandoc.Div(_quarto.ast.walk(float.content, {
-- strip image captions
Image = function(image)
image.caption = {}
image.caption = pandoc.Inlines{}
return image
end
}) or pandoc.Div({})) -- this should never happen but the lua analyzer doesn't know it
Expand Down Expand Up @@ -1098,4 +1098,4 @@ end, function(float)
return pandoc.Para({im})
end)

global_table_guid_id = 0
global_table_guid_id = 0
Loading
Loading