diff --git a/package-lock.json b/package-lock.json index 17f7bf5ca..2c9079800 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13996,6 +13996,7 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/unist-util-remove/-/unist-util-remove-3.1.1.tgz", "integrity": "sha512-kfCqZK5YVY5yEa89tvpl7KnBBHu2c6CzMkqHUrlOqaRgGOMp0sMvwWOVrbAtj03KhovQB7i96Gda72v/EFE0vw==", + "license": "MIT", "dependencies": { "@types/unist": "^2.0.0", "unist-util-is": "^5.0.0", @@ -15733,6 +15734,7 @@ "myst-cli-utils": "^2.0.11", "myst-common": "^1.7.2", "node-fetch": "^3.3.0", + "unist-util-remove": "^3.1.1", "unist-util-select": "^4.0.3", "vfile": "^5.3.7", "which": "^4.0.0" diff --git a/packages/myst-cli/src/process/mdast.ts b/packages/myst-cli/src/process/mdast.ts index fb50a03cd..c400669c1 100644 --- a/packages/myst-cli/src/process/mdast.ts +++ b/packages/myst-cli/src/process/mdast.ts @@ -68,6 +68,7 @@ import { transformImagesToDisk, transformFilterOutputStreams, transformLiftCodeBlocksInJupytext, + transformMarkdownOutputs, transformMystXRefs, } from '../transforms/index.js'; import type { ImageExtensions } from '../utils/resolveExtension.js'; @@ -239,6 +240,9 @@ export async function transformMdast( log: session.log, }); } + await transformMarkdownOutputs(mdast, { + parser: (content: string) => parseMyst(session, content, file), + }); transformRenderInlineExpressions(mdast, vfile); await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters }); transformFilterOutputStreams(mdast, vfile, frontmatter.settings); diff --git a/packages/myst-cli/src/process/notebook.ts b/packages/myst-cli/src/process/notebook.ts index ce8323f39..9f7775111 100644 --- a/packages/myst-cli/src/process/notebook.ts +++ b/packages/myst-cli/src/process/notebook.ts @@ -165,17 +165,16 @@ export async function processNotebookFull( value: ensureString(cell.source), }; - // Embed outputs in an output block - const output: { type: 'output'; id: string; data: IOutput[] } = { - type: 'output', - id: nanoid(), - data: [], - }; - - if (cell.outputs && (cell.outputs as IOutput[]).length > 0) { - output.data = cell.outputs as IOutput[]; - } - return acc.concat(blockParent(cell, [code, output])); + const outputs = (cell.outputs as IOutput[]).map((output) => { + // TODO: output-refactoring -- embed this in an `outputs node` in future + const result: { type: 'output'; id: string; data: IOutput[] } = { + type: 'output', + id: nanoid(), + data: [output], + }; + return result; + }); + return acc.concat(blockParent(cell, [code, ...outputs])); } return acc; }, diff --git a/packages/myst-cli/src/transforms/crossReferences.ts b/packages/myst-cli/src/transforms/crossReferences.ts index 470c1a956..929a9d22a 100644 --- a/packages/myst-cli/src/transforms/crossReferences.ts +++ b/packages/myst-cli/src/transforms/crossReferences.ts @@ -1,7 +1,8 @@ import type { VFile } from 'vfile'; import { selectAll } from 'unist-util-select'; +import { visit, SKIP } from 'unist-util-visit'; import type { FrontmatterParts, GenericNode, GenericParent, References } from 'myst-common'; -import { RuleId, fileWarn, plural, selectMdastNodes } from 'myst-common'; +import { RuleId, fileWarn, plural, selectMdastNodes, liftChildren } from 'myst-common'; import { computeHash, tic } from 'myst-cli-utils'; import { addChildrenFromTargetNode } from 'myst-transforms'; import type { PageFrontmatter } from 'myst-frontmatter'; @@ -9,6 +10,7 @@ import type { CrossReference, Dependency, Link, SourceFileKind } from 'myst-spec import type { ISession } from '../session/types.js'; import { loadFromCache, writeToCache } from '../session/cache.js'; import type { SiteAction, SiteExport } from 'myst-config'; +import type { IOutput } from '@jupyterlab/nbformat'; export const XREF_MAX_AGE = 1; // in days @@ -32,6 +34,104 @@ export type MystData = { references?: References; }; +/** + * Convert between MyST AST versions either side of the `output` refactoring work + * + * "past" AST is upgraded to "contemporary" AST. + * "future" AST is downgraded to "contemporary AST". + * + * where "contemporary` AST is immediately after #1661 merges" + * + * These two changes allow us to continue to publish AST that will mostly work with old mystmd/myst-theme deployments, whilst being ready for a future breaking change that we can anticipate. + * + * After these upgrades/downgrades, we ensure that we have the following pseudo-schema: + * + * type CodeBlock = { + * type: "block"; + * kind: "notebook-code", + * children: [ + * Code, + * Output, + * ..., + * Output + * ] + * } + * type Output = { + * type: "output"; + * children: GenericNode[]; + * visibility: ...; + * data: IOutput[1]; + * } + * + */ +function upgradeAndDowngradeMystData(data: MystData): MystData { + const makeUniqueLabel = (label: string | undefined, index: number): string | undefined => { + if (label === undefined) { + return undefined; + } + if (index === 0) { + return label; + } else { + return `${label}_${index}`; + } + }; + + // TODO: output-refactoring -- rewrite this function + visit( + data.mdast as any, + 'output', + (node: GenericNode, index: number | null, parent: GenericParent | null) => { + // Case 1: "old" schema with >1 data per Output + // Upgrade old schema to have >1 data per output + if (parent && node.data && node.data.length > 1) { + const outputs = node.data.map((outputData: IOutput, idx: number) => { + // Take the unique ID from the first node + const auxData = { + identifier: makeUniqueLabel(node.identifier, idx), + html_id: makeUniqueLabel(node.html_id, idx), + id: makeUniqueLabel(node.id, idx), + }; + return { + type: 'output', + visibility: node.visibility, + data: [outputData], + children: [], // FIXME: ignoring children here + ...auxData, + }; + }); + parent.children[index!] = outputs; + return SKIP; + } + // Case 2: "future" AST + // 1. delete new `jupyter_output` field of `Output` + // 2. restore `Output.data` + // 3. duplicate `Outputs.visibility` to `Output`, with `Output` taking precedence + // 4. erase `Outputs` type + else if (parent && parent.type === 'outputs' && 'jupyter_output' in node) { + // Erase the `Outputs` node + parent.type = '__lift__'; + + // Downgrade `jupyter_output` (1) and (2) + node.data = [node.jupyter_output]; + node.jupyter_output = undefined; + + // Duplicate `visibility` onto `Output` children (3) + node.visibility = node.visibility ?? parent.visibility; + + // Take unique ID from parent + if (index === 0) { + node.identifier = parent.identifier; + node.html_id = parent.html_id; + } + } + }, + ); + + // Erase lifted outputs + liftChildren(data.mdast as any, '__lift__'); + return data; +} + async function fetchMystData( session: ISession, dataUrl: string | undefined, @@ -48,7 +148,8 @@ async function fetchMystData( try { const resp = await session.fetch(dataUrl); if (resp.ok) { - const data = (await resp.json()) as MystData; + const data = upgradeAndDowngradeMystData((await resp.json()) as MystData); + writeToCache(session, filename, JSON.stringify(data)); return data; } diff --git a/packages/myst-cli/src/transforms/outputs.ts b/packages/myst-cli/src/transforms/outputs.ts index ea014c752..316e52f85 100644 --- a/packages/myst-cli/src/transforms/outputs.ts +++ b/packages/myst-cli/src/transforms/outputs.ts @@ -25,6 +25,63 @@ function getWriteDestination(hash: string, contentType: string, writeFolder: str return join(writeFolder, getFilename(hash, contentType)); } +const MARKDOWN_MIME_TYPE = 'text/markdown'; + +/** + * Parse a Markdown MIME type to identify the Markdown flavour + * + * @param mimeType - markdown MIME type e.g. text/markdown;variant=myst + */ +function parseVariant(mimeType: string): string | undefined { + const [variant] = Array.from(mimeType.matchAll(/;([^;]+)=([^;]+)/g)) + .filter(([name]) => name === 'variant') + .map((pair) => pair[1]); + return variant; +} + +/** + * Parse the Markdown content in each output, and embed the AST. + * + * This routine may introduce nodes that affect the global state, + * e.g. references. If other MIME keys take renderer precedence, + * it might confuse the reader to see e.g. enumeration jump from + * Fig 1. to Fig 3. + * + * @param mdast - existing AST containing output nodes + * @param opts - options to control parse result + */ +export async function transformMarkdownOutputs( + mdast: GenericParent, + opts: { + parser: (content: string) => GenericParent; + }, +) { + const outputs = selectAll('output', mdast) as GenericNode[]; + outputs.forEach((output) => { + const [rawOutput] = output.data as IOutput[]; + switch (rawOutput.output_type) { + case 'display_data': + case 'execute_result': { + // TODO: output-refactoring -- drop to single output in future + const mimeBundle = rawOutput.data as Record; + // Find the most MyST-like Markdown (if any) + const [bestEntry] = Object.entries(mimeBundle) + .filter(([mimeType]) => mimeType.startsWith(MARKDOWN_MIME_TYPE)) + .map(([mimeType, data]) => [parseVariant(mimeType), data]) + .filter(([variant]) => variant === undefined || variant === 'myst') + .sort((left) => (left[0] === undefined ? +1 : -1)); + + // Process Markdown + if (bestEntry !== undefined) { + const data = bestEntry[1]; + const outputMdast = opts.parser(data as string); + output.children = outputMdast.children; + } + } + } + }); +} + /** * Traverse all output nodes, minify their content, and cache on the session */ @@ -41,6 +98,7 @@ export async function transformOutputsToCache( outputs .filter((output) => output.visibility !== 'remove') .map(async (output) => { + // TODO: output-refactoring -- drop to single output in future output.data = await minifyCellOutput(output.data as IOutput[], cache.$outputs, { computeHash, maxCharacters: opts?.minifyMaxCharacters, @@ -77,6 +135,7 @@ export function transformFilterOutputStreams( const outputs = selectAll('output', block) as GenericNode[]; // There should be only one output in the block outputs.forEach((output) => { + // TODO: output-refactoring -- drop to single output in future output.data = output.data.filter((data: IStream | MinifiedMimeOutput) => { if ( (stderr !== 'show' || blockRemoveStderr) && @@ -193,6 +252,7 @@ export function transformOutputsToFile( const cache = castSession(session); outputs.forEach((node) => { + // TODO: output-refactoring -- drop to single output in future walkOutputs(node.data, (obj) => { const { hash } = obj; if (!hash || !cache.$outputs[hash]) return undefined; @@ -236,6 +296,7 @@ export function reduceOutputs( const outputs = selectAll('output', mdast) as GenericNode[]; const cache = castSession(session); outputs.forEach((node) => { + // TODO: output-refactoring -- drop to single output in future if (!node.data?.length && !node.children?.length) { node.type = '__delete__'; return; @@ -243,6 +304,7 @@ export function reduceOutputs( node.type = '__lift__'; if (node.children?.length) return; const selectedOutputs: { content_type: string; hash: string }[] = []; + // TODO: output-refactoring -- drop to single output in future node.data.forEach((output: MinifiedOutput) => { let selectedOutput: { content_type: string; hash: string } | undefined; walkOutputs([output], (obj: any) => { diff --git a/packages/myst-directives/src/code.ts b/packages/myst-directives/src/code.ts index b7da9b0f6..6cca7c4e5 100644 --- a/packages/myst-directives/src/code.ts +++ b/packages/myst-directives/src/code.ts @@ -212,7 +212,7 @@ export const codeCellDirective: DirectiveSpec = { }; const output = { type: 'output', - id: nanoid(), + children: [], data: [], }; const block: GenericNode = { diff --git a/packages/myst-execute/package.json b/packages/myst-execute/package.json index 32793817d..f638ae35d 100644 --- a/packages/myst-execute/package.json +++ b/packages/myst-execute/package.json @@ -37,6 +37,7 @@ "myst-cli-utils": "^2.0.11", "myst-common": "^1.7.2", "node-fetch": "^3.3.0", + "unist-util-remove": "^3.1.1", "unist-util-select": "^4.0.3", "vfile": "^5.3.7", "which": "^4.0.0" diff --git a/packages/myst-execute/src/execute.ts b/packages/myst-execute/src/execute.ts index 5f14286c8..c3ca2ac08 100644 --- a/packages/myst-execute/src/execute.ts +++ b/packages/myst-execute/src/execute.ts @@ -1,4 +1,5 @@ import { select, selectAll } from 'unist-util-select'; +import { remove } from 'unist-util-remove'; import type { Logger } from 'myst-cli-utils'; import type { PageFrontmatter, KernelSpec } from 'myst-frontmatter'; import type { Kernel, KernelMessage, Session, SessionManager } from '@jupyterlab/services'; @@ -282,10 +283,19 @@ function applyComputedOutputsToNodes( const thisResult = computedResult.shift(); if (isCellBlock(matchedNode)) { - // Pull out output to set data - const output = select('output', matchedNode) as unknown as { data: IOutput[] }; - // Set the output array to empty if we don't have a result (e.g. due to a kernel error) - output.data = thisResult === undefined ? [] : (thisResult as IOutput[]); + // Pull out code node + const code = select('code', matchedNode); + + // Remove outputs + remove(matchedNode, { cascade: false }, 'output'); + + // Generate outputs + const outputs = ((thisResult as IOutput[]) ?? []).map((data) => { + return { type: 'output', children: [], data: [data] as any }; + }); + // Ensure that whether this fails or succeeds, we write to `children` (e.g. due to a kernel error) + // TODO: output-refactoring -- contain these nodes in `outputs` + matchedNode.children = [code as any, ...outputs]; } else if (isInlineExpression(matchedNode)) { // Set data of expression to the result, or empty if we don't have one matchedNode.result = // TODO: FIXME .data diff --git a/packages/myst-execute/tests/execute.yml b/packages/myst-execute/tests/execute.yml index f768992aa..42a6c4340 100644 --- a/packages/myst-execute/tests/execute.yml +++ b/packages/myst-execute/tests/execute.yml @@ -67,9 +67,6 @@ cases: enumerator: 1 html_id: nb-cell-0-code - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output data: - output_type: stream name: stdout @@ -171,9 +168,6 @@ cases: enumerator: 1 html_id: nb-cell-0-code - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output data: - output_type: stream name: stdout @@ -195,9 +189,6 @@ cases: enumerator: 1 html_id: nb-cell-0-code - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output data: - output_type: error # Note this traceback can be different on various machines @@ -254,9 +245,6 @@ cases: enumerator: 1 html_id: nb-cell-0-code - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output data: - output_type: error # Note this traceback can be different on various machines @@ -313,7 +301,4 @@ cases: enumerator: 1 html_id: nb-cell-0-code - type: output - id: T7FMDqDm8dM2bOT1tKeeM - identifier: nb-cell-0-output - html_id: nb-cell-0-output data: diff --git a/packages/myst-execute/tests/run.spec.ts b/packages/myst-execute/tests/run.spec.ts index e6a5d159f..ab4f0bbfb 100644 --- a/packages/myst-execute/tests/run.spec.ts +++ b/packages/myst-execute/tests/run.spec.ts @@ -96,6 +96,7 @@ casesList.forEach(({ title, cases }) => { expect.arrayContaining([expect.stringMatching(throws)]), ); } + console.log(JSON.stringify(after, null, 2)); expect(before).toMatchObject(after); }, { timeout: 30_000 },