Skip to content

Commit

Permalink
feat: expand output to output[]
Browse files Browse the repository at this point in the history
  • Loading branch information
agoose77 committed Nov 25, 2024
1 parent b589574 commit 7de4192
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 33 deletions.
2 changes: 2 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions packages/myst-cli/src/process/mdast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import {
transformImagesToDisk,
transformFilterOutputStreams,
transformLiftCodeBlocksInJupytext,
transformMarkdownOutputs,
transformMystXRefs,
} from '../transforms/index.js';
import type { ImageExtensions } from '../utils/resolveExtension.js';
Expand Down Expand Up @@ -239,6 +240,9 @@ export async function transformMdast(
log: session.log,
});
}
await transformMarkdownOutputs(mdast, {
parser: (content: string) => parseMyst(session, content, file),
});
transformRenderInlineExpressions(mdast, vfile);
await transformOutputsToCache(session, mdast, kind, { minifyMaxCharacters });
transformFilterOutputStreams(mdast, vfile, frontmatter.settings);
Expand Down
21 changes: 10 additions & 11 deletions packages/myst-cli/src/process/notebook.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,17 +165,16 @@ export async function processNotebookFull(
value: ensureString(cell.source),
};

// Embed outputs in an output block
const output: { type: 'output'; id: string; data: IOutput[] } = {
type: 'output',
id: nanoid(),
data: [],
};

if (cell.outputs && (cell.outputs as IOutput[]).length > 0) {
output.data = cell.outputs as IOutput[];
}
return acc.concat(blockParent(cell, [code, output]));
const outputs = (cell.outputs as IOutput[]).map((output) => {
// TODO: output-refactoring -- embed this in an `outputs node` in future
const result: { type: 'output'; id: string; data: IOutput[] } = {
type: 'output',
id: nanoid(),
data: [output],
};
return result;
});
return acc.concat(blockParent(cell, [code, ...outputs]));
}
return acc;
},
Expand Down
105 changes: 103 additions & 2 deletions packages/myst-cli/src/transforms/crossReferences.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
import type { VFile } from 'vfile';
import { selectAll } from 'unist-util-select';
import { visit, SKIP } from 'unist-util-visit';
import type { FrontmatterParts, GenericNode, GenericParent, References } from 'myst-common';
import { RuleId, fileWarn, plural, selectMdastNodes } from 'myst-common';
import { RuleId, fileWarn, plural, selectMdastNodes, liftChildren } from 'myst-common';
import { computeHash, tic } from 'myst-cli-utils';
import { addChildrenFromTargetNode } from 'myst-transforms';
import type { PageFrontmatter } from 'myst-frontmatter';
import type { CrossReference, Dependency, Link, SourceFileKind } from 'myst-spec-ext';
import type { ISession } from '../session/types.js';
import { loadFromCache, writeToCache } from '../session/cache.js';
import type { SiteAction, SiteExport } from 'myst-config';
import type { IOutput } from '@jupyterlab/nbformat';

export const XREF_MAX_AGE = 1; // in days

Expand All @@ -32,6 +34,104 @@ export type MystData = {
references?: References;
};

/**
* Convert between MyST AST versions either side of the `output` refactoring work
*
* "past" AST is upgraded to "contemporary" AST.
* "future" AST is downgraded to "contemporary AST".
*
* where "contemporary` AST is immediately after #1661 merges"
*
* These two changes allow us to continue to publish AST that will mostly work with old mystmd/myst-theme deployments, whilst being ready for a future breaking change that we can anticipate.
*
* After these upgrades/downgrades, we ensure that we have the following pseudo-schema:
*
* type CodeBlock = {
* type: "block";
* kind: "notebook-code",
* children: [
* Code,
* Output,
* ...,
* Output
* ]
* }
* type Output = {
* type: "output";
* children: GenericNode[];
* visibility: ...;
* data: IOutput[1];
* }
*
*/
function upgradeAndDowngradeMystData(data: MystData): MystData {
const makeUniqueLabel = (label: string | undefined, index: number): string | undefined => {
if (label === undefined) {
return undefined;
}
if (index === 0) {
return label;
} else {
return `${label}_${index}`;
}
};

// TODO: output-refactoring -- rewrite this function
visit(
data.mdast as any,
'output',
(node: GenericNode, index: number | null, parent: GenericParent | null) => {
// Case 1: "old" schema with >1 data per Output
// Upgrade old schema to have >1 data per output
if (parent && node.data && node.data.length > 1) {
const outputs = node.data.map((outputData: IOutput, idx: number) => {
// Take the unique ID from the first node
const auxData = {
identifier: makeUniqueLabel(node.identifier, idx),
html_id: makeUniqueLabel(node.html_id, idx),
id: makeUniqueLabel(node.id, idx),
};
return {
type: 'output',
visibility: node.visibility,
data: [outputData],
children: [], // FIXME: ignoring children here
...auxData,
};
});
parent.children[index!] = outputs;
return SKIP;
}
// Case 2: "future" AST
// 1. delete new `jupyter_output` field of `Output`
// 2. restore `Output.data`
// 3. duplicate `Outputs.visibility` to `Output`, with `Output` taking precedence
// 4. erase `Outputs` type
else if (parent && parent.type === 'outputs' && 'jupyter_output' in node) {
// Erase the `Outputs` node
parent.type = '__lift__';

// Downgrade `jupyter_output` (1) and (2)
node.data = [node.jupyter_output];
node.jupyter_output = undefined;

// Duplicate `visibility` onto `Output` children (3)
node.visibility = node.visibility ?? parent.visibility;

// Take unique ID from parent
if (index === 0) {
node.identifier = parent.identifier;
node.html_id = parent.html_id;
}
}
},
);

// Erase lifted outputs
liftChildren(data.mdast as any, '__lift__');
return data;
}

async function fetchMystData(
session: ISession,
dataUrl: string | undefined,
Expand All @@ -48,7 +148,8 @@ async function fetchMystData(
try {
const resp = await session.fetch(dataUrl);
if (resp.ok) {
const data = (await resp.json()) as MystData;
const data = upgradeAndDowngradeMystData((await resp.json()) as MystData);

writeToCache(session, filename, JSON.stringify(data));
return data;
}
Expand Down
62 changes: 62 additions & 0 deletions packages/myst-cli/src/transforms/outputs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,63 @@ function getWriteDestination(hash: string, contentType: string, writeFolder: str
return join(writeFolder, getFilename(hash, contentType));
}

const MARKDOWN_MIME_TYPE = 'text/markdown';

/**
* Parse a Markdown MIME type to identify the Markdown flavour
*
* @param mimeType - markdown MIME type e.g. text/markdown;variant=myst
*/
function parseVariant(mimeType: string): string | undefined {
const [variant] = Array.from(mimeType.matchAll(/;([^;]+)=([^;]+)/g))
.filter(([name]) => name === 'variant')
.map((pair) => pair[1]);
return variant;
}

/**
* Parse the Markdown content in each output, and embed the AST.
*
* This routine may introduce nodes that affect the global state,
* e.g. references. If other MIME keys take renderer precedence,
* it might confuse the reader to see e.g. enumeration jump from
* Fig 1. to Fig 3.
*
* @param mdast - existing AST containing output nodes
* @param opts - options to control parse result
*/
export async function transformMarkdownOutputs(
mdast: GenericParent,
opts: {
parser: (content: string) => GenericParent;
},
) {
const outputs = selectAll('output', mdast) as GenericNode[];
outputs.forEach((output) => {
const [rawOutput] = output.data as IOutput[];
switch (rawOutput.output_type) {
case 'display_data':
case 'execute_result': {
// TODO: output-refactoring -- drop to single output in future
const mimeBundle = rawOutput.data as Record<string, unknown>;
// Find the most MyST-like Markdown (if any)
const [bestEntry] = Object.entries(mimeBundle)
.filter(([mimeType]) => mimeType.startsWith(MARKDOWN_MIME_TYPE))
.map(([mimeType, data]) => [parseVariant(mimeType), data])
.filter(([variant]) => variant === undefined || variant === 'myst')
.sort((left) => (left[0] === undefined ? +1 : -1));

// Process Markdown
if (bestEntry !== undefined) {
const data = bestEntry[1];
const outputMdast = opts.parser(data as string);
output.children = outputMdast.children;
}
}
}
});
}

/**
* Traverse all output nodes, minify their content, and cache on the session
*/
Expand All @@ -41,6 +98,7 @@ export async function transformOutputsToCache(
outputs
.filter((output) => output.visibility !== 'remove')
.map(async (output) => {
// TODO: output-refactoring -- drop to single output in future
output.data = await minifyCellOutput(output.data as IOutput[], cache.$outputs, {
computeHash,
maxCharacters: opts?.minifyMaxCharacters,
Expand Down Expand Up @@ -77,6 +135,7 @@ export function transformFilterOutputStreams(
const outputs = selectAll('output', block) as GenericNode[];
// There should be only one output in the block
outputs.forEach((output) => {
// TODO: output-refactoring -- drop to single output in future
output.data = output.data.filter((data: IStream | MinifiedMimeOutput) => {
if (
(stderr !== 'show' || blockRemoveStderr) &&
Expand Down Expand Up @@ -193,6 +252,7 @@ export function transformOutputsToFile(
const cache = castSession(session);

outputs.forEach((node) => {
// TODO: output-refactoring -- drop to single output in future
walkOutputs(node.data, (obj) => {
const { hash } = obj;
if (!hash || !cache.$outputs[hash]) return undefined;
Expand Down Expand Up @@ -236,13 +296,15 @@ export function reduceOutputs(
const outputs = selectAll('output', mdast) as GenericNode[];
const cache = castSession(session);
outputs.forEach((node) => {
// TODO: output-refactoring -- drop to single output in future
if (!node.data?.length && !node.children?.length) {
node.type = '__delete__';
return;
}
node.type = '__lift__';
if (node.children?.length) return;
const selectedOutputs: { content_type: string; hash: string }[] = [];
// TODO: output-refactoring -- drop to single output in future
node.data.forEach((output: MinifiedOutput) => {
let selectedOutput: { content_type: string; hash: string } | undefined;
walkOutputs([output], (obj: any) => {
Expand Down
2 changes: 1 addition & 1 deletion packages/myst-directives/src/code.ts
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ export const codeCellDirective: DirectiveSpec = {
};
const output = {
type: 'output',
id: nanoid(),
children: [],
data: [],
};
const block: GenericNode = {
Expand Down
1 change: 1 addition & 0 deletions packages/myst-execute/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"myst-cli-utils": "^2.0.11",
"myst-common": "^1.7.2",
"node-fetch": "^3.3.0",
"unist-util-remove": "^3.1.1",
"unist-util-select": "^4.0.3",
"vfile": "^5.3.7",
"which": "^4.0.0"
Expand Down
18 changes: 14 additions & 4 deletions packages/myst-execute/src/execute.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { select, selectAll } from 'unist-util-select';
import { remove } from 'unist-util-remove';
import type { Logger } from 'myst-cli-utils';
import type { PageFrontmatter, KernelSpec } from 'myst-frontmatter';
import type { Kernel, KernelMessage, Session, SessionManager } from '@jupyterlab/services';
Expand Down Expand Up @@ -282,10 +283,19 @@ function applyComputedOutputsToNodes(
const thisResult = computedResult.shift();

if (isCellBlock(matchedNode)) {
// Pull out output to set data
const output = select('output', matchedNode) as unknown as { data: IOutput[] };
// Set the output array to empty if we don't have a result (e.g. due to a kernel error)
output.data = thisResult === undefined ? [] : (thisResult as IOutput[]);
// Pull out code node
const code = select('code', matchedNode);

// Remove outputs
remove(matchedNode, { cascade: false }, 'output');

// Generate outputs
const outputs = ((thisResult as IOutput[]) ?? []).map((data) => {
return { type: 'output', children: [], data: [data] as any };
});
// Ensure that whether this fails or succeeds, we write to `children` (e.g. due to a kernel error)
// TODO: output-refactoring -- contain these nodes in `outputs`
matchedNode.children = [code as any, ...outputs];
} else if (isInlineExpression(matchedNode)) {
// Set data of expression to the result, or empty if we don't have one
matchedNode.result = // TODO: FIXME .data
Expand Down
15 changes: 0 additions & 15 deletions packages/myst-execute/tests/execute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,6 @@ cases:
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
- output_type: stream
name: stdout
Expand Down Expand Up @@ -171,9 +168,6 @@ cases:
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
- output_type: stream
name: stdout
Expand All @@ -195,9 +189,6 @@ cases:
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
- output_type: error
# Note this traceback can be different on various machines
Expand Down Expand Up @@ -254,9 +245,6 @@ cases:
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
- output_type: error
# Note this traceback can be different on various machines
Expand Down Expand Up @@ -313,7 +301,4 @@ cases:
enumerator: 1
html_id: nb-cell-0-code
- type: output
id: T7FMDqDm8dM2bOT1tKeeM
identifier: nb-cell-0-output
html_id: nb-cell-0-output
data:
Loading

0 comments on commit 7de4192

Please sign in to comment.