From 3de4b4a70a451067431ee5febca420e937f3cd07 Mon Sep 17 00:00:00 2001 From: Chris Fenner Date: Wed, 11 Sep 2024 00:13:54 +0000 Subject: [PATCH 1/2] Implement lua mermaid filter with stable filenames The JS mermaid filter uses randomly generated tmp filenames, which means that diffs think that mermaid diagrams are constantly changing. Implementing our own mermaid filter (just wrapping mmdc) allows us to use stable filenames and cache the mermaid output across invocations. --- Dockerfile | 2 +- build.sh | 19 ++-------- filter/mermaid-code-class-pre.lua | 22 ----------- filter/mermaid-filter.lua | 63 +++++++++++++++++++++++++++++++ 4 files changed, 68 insertions(+), 38 deletions(-) delete mode 100644 filter/mermaid-code-class-pre.lua create mode 100644 filter/mermaid-filter.lua diff --git a/Dockerfile b/Dockerfile index a3735dd..3f62332 100644 --- a/Dockerfile +++ b/Dockerfile @@ -143,7 +143,7 @@ RUN apt install -y \ ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \ PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser -RUN npm install --global --unsafe-perm puppeteer@23.2.1 imgur@2.4.2 mermaid-filter@1.4.7 typescript@5.5.4 pandiff@0.6.0 +RUN npm install --global --unsafe-perm puppeteer@23.2.1 imgur@2.4.2 @mermaid-js/mermaid-cli@11.1.1 typescript@5.5.4 pandiff@0.6.0 # Important: /usr/local/texlive/bin/ paths come before other paths. We want to use the texlive we # built above, not any that happen to have come along with our base image. diff --git a/build.sh b/build.sh index 445faa5..9396f9b 100755 --- a/build.sh +++ b/build.sh @@ -519,8 +519,6 @@ echo "Date (English): ${DATE_ENGLISH}" FROM="markdown+gfm_auto_identifiers+fenced_divs+implicit_figures+multiline_tables+grid_tables+table_captions-markdown_in_html_blocks" cp /resources/filters/mermaid-config.json .mermaid-config.json -export MERMAID_FILTER_FORMAT="pdf" -export MERMAID_FILTER_BACKGROUND="transparent" # The Mermaid filter loses track of the web browser it uses to render diagrams # sometimes (maybe 5% of the time or so). @@ -582,8 +580,7 @@ do_latex() { --standalone --no-highlight --template=tcg.tex - --lua-filter=mermaid-code-class-pre.lua - --filter=mermaid-filter + --lua-filter=mermaid-filter.lua --lua-filter=informative-sections.lua --lua-filter=convert-images.lua --lua-filter=center-images.lua @@ -655,6 +652,7 @@ do_pdf() { cp *.upb "${SOURCE_DIR}" 2>/dev/null # Copy converted images so they can be cached as well. cp *.convert.pdf "${SOURCE_DIR}" 2>/dev/null + cp *.mermaid.pdf "${SOURCE_DIR}" 2>/dev/null echo "Elapsed time: $(($end-$start)) seconds" # Write any LaTeX errors to stderr. >&2 grep -A 5 "! " "${logfile}" @@ -688,8 +686,7 @@ do_docx() { cmd=(pandoc --embed-resources --standalone - --lua-filter=mermaid-code-class-pre.lua - --filter=mermaid-filter + --lua-filter=mermaid-filter.lua --lua-filter=convert-images.lua --lua-filter=parse-html.lua --lua-filter=apply-classes-to-tables.lua @@ -728,8 +725,7 @@ do_html() { -V toccolor=blue --embed-resources --standalone - --lua-filter=mermaid-code-class-pre.lua - --filter=mermaid-filter + --lua-filter=mermaid-filter.lua --lua-filter=parse-html.lua --lua-filter=apply-classes-to-tables.lua --lua-filter=landscape-pages.lua @@ -790,12 +786,6 @@ if [ -n "${DOCX_OUTPUT}" ]; then do_docx "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${DOCX_OUTPUT}" fi -# Generate the html output -export MERMAID_FILTER_FORMAT="svg" -if [ -n "${HTML_OUTPUT}" ]; then - do_html "${BUILD_DIR}/${INPUT_FILE}" "${SOURCE_DIR}/${HTML_OUTPUT}" -fi - # Diffs may fail in some circumstances. Do not fail the entire workflow here. PRE_DIFFING_FAILED="${FAILED}" @@ -804,7 +794,6 @@ PRE_DIFFING_FAILED="${FAILED}" readonly TEMP_DIFFBASE_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diffbase.tex" readonly TEMP_DIFF_TEX_FILE="${BUILD_DIR}/${INPUT_FILE}.diff.tex" readonly TEMP_LATEXDIFF_LOG="${BUILD_DIR}/latexdiff.log" -export MERMAID_FILTER_FORMAT="pdf" if [ -n "${DIFFPDF_OUTPUT}" -o -n "${DIFFTEX_OUTPUT}" ]; then git fetch --unshallow --quiet 2>/dev/null git reset --hard ${DIFFBASE} diff --git a/filter/mermaid-code-class-pre.lua b/filter/mermaid-code-class-pre.lua deleted file mode 100644 index 0f4bdc9..0000000 --- a/filter/mermaid-code-class-pre.lua +++ /dev/null @@ -1,22 +0,0 @@ --- Mermaid-filter doesn't support arbitrary classes on the code block. --- Preprocess Mermaid diagram code blocks by enclosing them in figures. - -function CodeBlock(el) - local isMermaid = false - local figure_classes = pandoc.List({}) - for i, class in ipairs(el.classes) do - if class == 'mermaid' then - isMermaid = true - else - figure_classes:insert(class) - end - end - if isMermaid then - local caption = {long = pandoc.Plain(pandoc.Str(el.attributes.caption))} - local attrs = pandoc.Attr(el.identifier, figure_classes) - el.identifier = nil - el.classes = {'mermaid'} - return pandoc.Figure(el, caption, attrs) - end - return el -end diff --git a/filter/mermaid-filter.lua b/filter/mermaid-filter.lua new file mode 100644 index 0000000..efe76d8 --- /dev/null +++ b/filter/mermaid-filter.lua @@ -0,0 +1,63 @@ +-- Turn mermaid-classed code blocks into figures, retaining other classes on the +-- code block as classes on the figure. + +function runCommandWithInput(command, input) + local pipe = io.popen(command, "w") + if not pipe then + return false + end + pipe:write(input) + pipe:flush() + pipe:close() + return true +end + +function getContentsHash(contents) + return pandoc.sha1(contents):sub(1,10) +end + +function fileExists(file) + local f = io.open(file) + if f then + f:close() + return true + end + return false +end + +function mermaidFigure(code, caption, attrs) + local filename = getContentsHash('code=' .. code .. 'caption=' .. pandoc.utils.stringify(caption) .. 'attrs=' .. pandoc.utils.stringify(attrs)) .. '.mermaid.pdf' + if fileExists(filename) then + print(string.format('%s already exists; not re-rendering it', filename)) + else + print(string.format('rendering %s using Mermaid...', filename)) + if not runCommandWithInput(string.format( + "mmdc --configFile /resources/filters/mermaid-config.json --puppeteerConfigFile ./.puppeteer.json --width 2000 --height 2000 --backgroundColor transparent --pdfFit --input - --output %s 2>&1", filename), code) then + print(string.format('failed to convert %s to %s using drawio, falling back to letting latex try to pick it up', source, dest)) + return false + end + end + + local img = pandoc.Image(caption, filename) + return pandoc.Figure(img, caption, attrs) +end + +function CodeBlock(el) + local isMermaid = false + local figure_classes = pandoc.List({}) + for i, class in ipairs(el.classes) do + if class == 'mermaid' then + isMermaid = true + else + figure_classes:insert(class) + end + end + if isMermaid then + local caption = {long = pandoc.Plain(pandoc.Str(el.attributes.caption))} + local attrs = pandoc.Attr(el.identifier, figure_classes) + el.identifier = nil + el.classes = {'mermaid'} + return mermaidFigure(el.text, caption, attrs) + end + return el +end From bab40853d260efe38f62140fa75b520c6611f7c6 Mon Sep 17 00:00:00 2001 From: Chris Fenner Date: Wed, 11 Sep 2024 00:15:40 +0000 Subject: [PATCH 2/2] cache mermaid files --- .github/workflows/render.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/render.yml b/.github/workflows/render.yml index 59132ba..7545290 100644 --- a/.github/workflows/render.yml +++ b/.github/workflows/render.yml @@ -71,7 +71,8 @@ jobs: *.toc *.upa *.upb - media/*.convert.pdf + *.convert.pdf + *.mermaid.pdf key: latex-${{ inputs.input }}-${{ inputs.container-version }}-${{ github.run_id }} restore-keys: latex-${{ inputs.input }}-${{ inputs.container-version }}