diff --git a/bun.lockb b/bun.lockb index 7a671a5..bcd6034 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/package.json b/package.json index a556042..3b90812 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ }, "dependencies": { "@djot/djot": "^0.3.1", + "@types/mdast": "^4.0.4", "argparse": "^2.0.1", "fast-glob": "^3.3.2", "gluegun": "^5.2.0", @@ -49,7 +50,8 @@ "remark-parse": "^11.0.0", "remark-rehype": "^11.1.0", "remark-stringify": "^11.0.0", - "shiki": "^1.14.1" + "shiki": "^1.14.1", + "unified": "^11.0.5" }, "devDependencies": { "@djockey/linkmapper-typedoc": "^0.0.7", diff --git a/src/engine/executeConfig.ts b/src/engine/executeConfig.ts index 1a2539f..eec9645 100644 --- a/src/engine/executeConfig.ts +++ b/src/engine/executeConfig.ts @@ -48,8 +48,13 @@ export async function readDocSet( ): Promise { const logCollector = logCollectorParent.getChild("Parsing documents"); - const parsePromises = config.fileList.map((path_) => - parseFile(config.input_dir, path_, config, logCollector) + if (!config.fileList.length) { + throw new Error("No files"); + } + + const parsePromises = config.fileList.map( + async (path_) => + await parseFile(config.input_dir, path_, config, logCollector) ); const docs = (await Promise.all(parsePromises)).filter((doc) => !!doc); diff --git a/src/input/fileExtensions.ts b/src/input/fileExtensions.ts index c04e601..044132f 100644 --- a/src/input/fileExtensions.ts +++ b/src/input/fileExtensions.ts @@ -8,35 +8,52 @@ import { export function getExtensionForInputFormat(fmt: DjockeyInputFormat): string[] { switch (fmt) { case "gfm": - return ["md", "markdown"]; + return [".md", ".markdown"]; case "djot": - return ["djot", "dj"]; + return [".djot", ".dj"]; case "myst": - return ["md"]; + return [".myst.md", ".md"]; + case "commonmark": + return [".common.md", ".md", ".markdown"]; } } -export function getInputFormatForFileExtension( - ext: string, +export function getInputFormatForFileName( + filename: string, config: DjockeyConfig, frontMatter: Record ): DjockeyInputFormat | null { - const bareExt = ext[0] === "." ? ext.slice(1) : ext; - const defaultMarkdownVariant: MarkdownVariant = (frontMatter.md_variant as MarkdownVariant | undefined) ?? config.default_markdown_variant; - switch (bareExt) { - case "dj": - return "djot"; - case "djot": - return "djot"; - case "md": - return defaultMarkdownVariant; - case "markdown": + for (const fmt of ALL_INPUT_FORMATS) { + for (const ext of getExtensionForInputFormat(fmt)) { + // Double-extensions disambiguate between Markdown formats. + if (ext.split(".").length > 2 && filename.endsWith(ext)) { + return fmt; + } + } + } + + // If we didn't find a totally unambiguous extension, try Markdown. + const mdExts = [".md", ".markdown"]; + for (const ext of mdExts) { + if (filename.endsWith(ext)) { return defaultMarkdownVariant; - default: - return null; + } } + + // Otherwise, try everything else. + for (const fmt of ALL_INPUT_FORMATS) { + for (const ext of getExtensionForInputFormat(fmt)) { + if (filename.endsWith(ext)) { + return fmt; + } + } + } + + console.error("Can't figure out format for", filename); + + return null; } diff --git a/src/input/parseFile.ts b/src/input/parseFile.ts index f9ca4de..3e8c091 100644 --- a/src/input/parseFile.ts +++ b/src/input/parseFile.ts @@ -2,15 +2,18 @@ import fs from "fs"; import path from "path"; import { basename } from "path"; -import yaml from "js-yaml"; import { fromPandoc, parse } from "@djot/djot"; +import { mystParse } from "myst-parser"; +import remarkParse from "remark-parse"; +import remarkGfm from "remark-gfm"; +import { unified } from "unified"; +import yaml from "js-yaml"; + import { DjockeyConfig, DjockeyDoc, PolyglotDoc } from "../types.js"; import { getPandocAST } from "../pandoc.js"; -import { getInputFormatForFileExtension } from "./fileExtensions.js"; +import { getInputFormatForFileName } from "./fileExtensions.js"; import { LogCollector } from "../utils/logUtils.js"; -import { fsext, fsname, fssplit, refjoin } from "../utils/pathUtils.js"; - -import { mystParse } from "myst-parser"; +import { fsbase, fsext, fsname, fssplit, refjoin } from "../utils/pathUtils.js"; function removeExtensionFromPath(path_: string): string { return path_.slice(0, path_.length - path.parse(path_).ext.length); @@ -45,8 +48,17 @@ export async function parseFile( let polyglotDoc: PolyglotDoc | undefined; - switch (getInputFormatForFileExtension(fsext(fsPath), config, frontMatter)) { + logCollector.warning( + `${getInputFormatForFileName( + fsbase(fsPath), + config, + frontMatter + )} ${fsPath}` + ); + + switch (getInputFormatForFileName(fsbase(fsPath), config, frontMatter)) { case "djot": + console.log("Parse", fsbase(fsPath), "as djot"); polyglotDoc = { kind: "djot", value: parse(text, { @@ -56,10 +68,17 @@ export async function parseFile( }; break; case "gfm": + console.log("Parse", fsbase(fsPath), "as gfm"); const ast = getPandocAST(fsPath); polyglotDoc = { kind: "djot", value: fromPandoc(ast as any) }; break; + case "commonmark": + console.log("Parse", fsbase(fsPath), "as gfm"); + const file = await unified().use(remarkParse).use(remarkGfm).process(); + console.log(file); + break; case "myst": + console.log("Parse", fsbase(fsPath), "as myst"); polyglotDoc = { kind: "mdast", value: mystParse(text) }; console.log(yaml.dump(polyglotDoc.value)); break; diff --git a/src/plugins/autoTitlePlugin.ts b/src/plugins/autoTitlePlugin.ts index 995be0f..3452579 100644 --- a/src/plugins/autoTitlePlugin.ts +++ b/src/plugins/autoTitlePlugin.ts @@ -1,16 +1,15 @@ import { Heading } from "@djot/djot"; import { visit, EXIT } from "unist-util-visit"; -import mdast from "mdast"; import { applyFilter } from "../engine/djotFiltersPlus.js"; import { DjockeyDoc, DjockeyPlugin } from "../types.js"; -import { djotASTToText, mystASTToText } from "../utils/djotUtils.js"; +import { djotASTToText } from "../utils/djotUtils.js"; import { LogCollector } from "../utils/logUtils.js"; -import { mystParse } from "myst-parser"; import { djotASTToMystAST_Inline, mystASTToDjotAST_Inline, } from "../utils/astUtils.js"; +import { toString } from "mdast-util-to-string"; export class AutoTitlePlugin implements DjockeyPlugin { name = "Auto Titler"; @@ -40,7 +39,7 @@ export class AutoTitlePlugin implements DjockeyPlugin { break; case "mdast": visit(doc.docs.content.value, "heading", (node) => { - doc.title = mystASTToText(node as mdast.Heading); + doc.title = toString(node); doc.titleASTDjot = mystASTToDjotAST_Inline(node); doc.titleASTMyst = node; return EXIT; diff --git a/src/renderers/htmlRenderer.ts b/src/renderers/htmlRenderer.ts index 590d71d..012f46a 100644 --- a/src/renderers/htmlRenderer.ts +++ b/src/renderers/htmlRenderer.ts @@ -229,6 +229,7 @@ export class HTMLRenderer implements DjockeyRenderer { urlLists: filteredURLListsAsURLs, }); + console.log("Write to", outputFSPath); await writeFile(outputFSPath, outputPage); } } diff --git a/src/types.ts b/src/types.ts index 7fe2e5d..c49d5ad 100644 --- a/src/types.ts +++ b/src/types.ts @@ -9,7 +9,7 @@ export interface LinkMappingConfig { url_root: string; } -export type MarkdownVariant = "gfm" | "myst"; +export type MarkdownVariant = "gfm" | "myst" | "commonmark"; export interface DjockeyConfig { input_dir: string; @@ -61,7 +61,7 @@ export interface DjockeyConfigResolved extends DjockeyConfig { rootPath: string; fileList: string[]; url_root: string; - default_markdown_variant: "gfm" | "myst"; + default_markdown_variant: "gfm" | "myst" | "commonmark"; link_mappings: LinkMappingConfig[]; } @@ -96,8 +96,13 @@ export interface DjockeyDoc { } // These correspond to pandoc formats. Keep these two lines in sync. -export type DjockeyInputFormat = "djot" | "gfm" | "myst"; -export const ALL_INPUT_FORMATS: DjockeyInputFormat[] = ["djot", "gfm", "myst"]; +export type DjockeyInputFormat = "djot" | "gfm" | "myst" | "commonmark"; +export const ALL_INPUT_FORMATS: DjockeyInputFormat[] = [ + "djot", + "gfm", + "myst", + "commonmark", +]; // Keep these two lines in sync. export type DjockeyOutputFormat = "html" | "gfm"; diff --git a/src/utils/astUtils.ts b/src/utils/astUtils.ts index ef2de17..03ef0dc 100644 --- a/src/utils/astUtils.ts +++ b/src/utils/astUtils.ts @@ -1,10 +1,12 @@ import { Block, Heading, Inline } from "@djot/djot"; import { Parent, PhrasingContent } from "mdast"; +import unist from "unist"; import { visit } from "unist-util-visit"; +import { toString } from "mdast-util-to-string"; import { applyFilter } from "../engine/djotFiltersPlus.js"; import { DjockeyDoc, PolyglotDoc, PolyglotDoc_MDAST } from "../types.js"; -import { djotASTToText, mystASTToText } from "./djotUtils.js"; +import { djotASTToText } from "./djotUtils.js"; export function getDoesDocHaveContent(doc: PolyglotDoc): boolean { switch (doc.kind) { @@ -43,14 +45,12 @@ export function getFirstHeadingIsAlreadyDocumentTitle( return didFindNode; } -export function mystASTToDjotAST_Inline(mystRoot: Parent): Inline[] { - return [{ tag: "str", text: mystASTToText(mystRoot) }]; +export function mystASTToDjotAST_Inline(root: unist.Parent): Inline[] { + return [{ tag: "str", text: toString(root) }]; } -export function mystASTToDjotAST_Block(mystRoot: Parent): Block[] { - return [ - { tag: "para", children: [{ tag: "str", text: mystASTToText(mystRoot) }] }, - ]; +export function mystASTToDjotAST_Block(root: unist.Parent): Block[] { + return [{ tag: "para", children: [{ tag: "str", text: toString(root) }] }]; } export function djotASTToMystAST_Inline(djotRoot: Inline[]): PhrasingContent[] { diff --git a/src/utils/djotUtils.ts b/src/utils/djotUtils.ts index eacf849..3918168 100644 --- a/src/utils/djotUtils.ts +++ b/src/utils/djotUtils.ts @@ -7,10 +7,10 @@ import { isBlock, } from "@djot/djot"; import mdast from "mdast"; +import unist from "unist"; import { visit } from "unist-util-visit"; import { processAllNodes } from "../engine/djotFiltersPlus.js"; -import { MystDoc } from "../types.js"; export function getHasClass(node: HasAttributes, cls: string): boolean { if (!node.attributes || !node.attributes["class"]) return false; @@ -58,14 +58,6 @@ export function djotASTToText(children: Block[]) { return result.join(""); } -export function mystASTToText(root: mdast.Parent) { - const result = new Array(); - visit(root, "text", (node) => { - result.push((node as mdast.Text).value); - }); - return result.join(""); -} - export function djotASTToTextWithLineBreaks(children: Block[]) { const result = new Array(); diff --git a/src/utils/pathUtils.ts b/src/utils/pathUtils.ts index 4f02bb9..357dc61 100644 --- a/src/utils/pathUtils.ts +++ b/src/utils/pathUtils.ts @@ -127,6 +127,10 @@ export function fsname(s: string): string { return path.parse(s).name; } +export function fsbase(s: string): string { + return path.parse(s).base; +} + export function fsext(s: string): string { return path.parse(s).ext; } diff --git a/yarn.lock b/yarn.lock index d99d156..3f6233e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1396,7 +1396,7 @@ __metadata: languageName: node linkType: hard -"@types/mdast@npm:^4.0.0": +"@types/mdast@npm:^4.0.0, @types/mdast@npm:^4.0.4": version: 4.0.4 resolution: "@types/mdast@npm:4.0.4" dependencies: @@ -2825,6 +2825,7 @@ __metadata: "@types/js-yaml": ^4.0.9 "@types/log-update": ^3.1.0 "@types/lunr": ^2.3.7 + "@types/mdast": ^4.0.4 "@types/micromatch": ^4.0.9 "@types/node": ^22.4.0 "@types/nunjucks": ^3.2.6 @@ -2856,6 +2857,7 @@ __metadata: ts-jest: ^29.2.5 typedoc: ^0.26.6 typescript: ^5.5.4 + unified: ^11.0.5 bin: djockey: ./dist/cli.js languageName: unknown @@ -7555,7 +7557,7 @@ __metadata: languageName: node linkType: hard -"unified@npm:^11.0.0": +"unified@npm:^11.0.0, unified@npm:^11.0.5": version: 11.0.5 resolution: "unified@npm:11.0.5" dependencies: