From aa52a2ffe04fcfc34d71d7e81d614fda0c12d690 Mon Sep 17 00:00:00 2001 From: Nathan Lovato Date: Fri, 20 Dec 2024 19:54:05 +0100 Subject: [PATCH 1/7] fix: gdscript: for now, skip comments when parsing symbols --- mdx-utils/src/gdscript/parser_gdscript.nim | 48 +++++++++++++++++++--- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index cb9695f159..e584a09891 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -221,6 +221,10 @@ proc scanToken(s: var Scanner): Token = let startPos = s.current let c = s.getCurrentChar() case c + # Comment, skip to end of line and continue + of '#': + discard s.scanToEndOfLine() + continue # Function definition of 'f': if s.matchString("func"): @@ -353,6 +357,9 @@ proc parseClass(s: var Scanner, classToken: var Token) = let currentIndent = s.countIndentation() if currentIndent <= classIndent: if isNewDefinition(s): + echo "New definition found: ", s.getCurrentChar() + echo "Indent level is ", currentIndent + echo "Line of text is", s.source[s.current ..< s.current + 20] break let childToken = s.scanToken() @@ -407,12 +414,6 @@ proc getTokenFromCache(symbolName: string, filePath: string): Token = return file.symbols[symbolName] -proc getGDScriptCodeFromCache(filePath: string): var string = - # Gets the code of a GDScript file from the cache given its path - if not gdscriptFiles.hasKey(filePath): - parseGDScriptFile(filePath) - return gdscriptFiles[filePath].source - proc getSymbolText(symbolName: string, path: string): string = # Gets the text of a symbol given its name and the path to the file let token = getTokenFromCache(symbolName, path) @@ -647,6 +648,41 @@ class StateMachine extends Node: classToken.children[2].tokenType == TokenType.Variable classToken.children[3].tokenType == TokenType.Function + test "Parse larger inner class with anchors": + let code = """ +#ANCHOR:class_StateDie +class StateDie extends State: + + const SmokeExplosionScene = preload("res://assets/vfx/smoke_vfx/smoke_explosion.tscn") + + func _init(init_mob: Mob3D) -> void: + super("Die", init_mob) + + func enter() -> void: + mob.skin.play("die") + + var smoke_explosion := SmokeExplosionScene.instantiate() + mob.add_sibling(smoke_explosion) + smoke_explosion.global_position = mob.global_position + + mob.skin.animation_finished.connect(func (_animation_name: String) -> void: + mob.queue_free() + ) +#END:class_StateDie +""" + let tokens = parseGDScript(code) + check: + tokens.len == 1 + if tokens.len == 1: + let classToken = tokens[0] + check: + classToken.tokenType == TokenType.Class + classToken.getName(code) == "StateMachine" + classToken.children.len == 3 + else: + echo "Found tokens: ", tokens.len + printTokens(tokens, code) + when isMainModule: runUnitTests() #runPerformanceTest() From cb97ddc95ded3c62bb0fa11155b672f610d9f2f1 Mon Sep 17 00:00:00 2001 From: Nathan Lovato Date: Fri, 20 Dec 2024 20:30:11 +0100 Subject: [PATCH 2/7] backtrack when scanning body to end at \n after last code character --- mdx-utils/src/gdscript/parser_gdscript.nim | 29 ++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index e584a09891..f2cd838ee1 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -109,8 +109,9 @@ proc matchString(s: var Scanner, expected: string): bool {.inline.} = return true return false -proc countIndentation(s: var Scanner): int {.inline.} = +proc countIndentationAndAdvance(s: var Scanner): int {.inline.} = ## Counts the number of spaces and tabs starting from the current position + ## Advances the scanner as it counts the indentation ## Call this function at the start of a line to count the indentation result = 0 while true: @@ -198,24 +199,33 @@ proc isNewDefinition(s: var Scanner): bool {.inline.} = s.skipWhitespace() result = s.peekString("func") or s.peekString("var") or s.peekString("const") or - s.peekString("class") or s.peekString("signal") or s.peekString("enum") + s.peekString("class") or s.peekString("signal") or s.peekString("enum") or + # TODO: Consider how to handle regular comments vs anchors + s.peekString("#ANCHOR") or s.peekString("#END") or + s.peekString("# ANCHOR") or s.peekString("# END") s.current = savedPos return result proc scanBody(s: var Scanner, startIndent: int): tuple[bodyStart, bodyEnd: int] = let start = s.current while not s.isAtEnd(): - let currentIndent = s.countIndentation() + let currentIndent = s.countIndentationAndAdvance() if currentIndent <= startIndent and not s.isAtEnd(): if isNewDefinition(s): break discard scanToEndOfLine(s) + # s.current points to the first letter of the next token, after the + # indentation. We need to backtrack to find the actual end of the body. + var index = s.current - 1 + while index > 0 and s.source[index] in [' ', '\r', '\t', '\n']: + index -= 1 + s.current = index + 1 result = (start, s.current) proc scanToken(s: var Scanner): Token = while not s.isAtEnd(): - s.indentLevel = s.countIndentation() + s.indentLevel = s.countIndentationAndAdvance() s.skipWhitespace() let startPos = s.current @@ -354,12 +364,10 @@ proc parseClass(s: var Scanner, classToken: var Token) = let classIndent = s.indentLevel s.current = classToken.range.bodyStart while not s.isAtEnd(): - let currentIndent = s.countIndentation() + #Problem: s is on the first char of the token instead of the beginning of the line + let currentIndent = s.countIndentationAndAdvance() if currentIndent <= classIndent: if isNewDefinition(s): - echo "New definition found: ", s.getCurrentChar() - echo "Indent level is ", currentIndent - echo "Line of text is", s.source[s.current ..< s.current + 20] break let childToken = s.scanToken() @@ -501,7 +509,6 @@ proc getCode*(symbolQuery: string, filePath: string): string = result = getSymbolBody(query.name, filePath) else: result = getSymbolText(query.name, filePath) - result = result.strip(trailing = true) proc runPerformanceTest() = let codeTest = @@ -677,8 +684,10 @@ class StateDie extends State: let classToken = tokens[0] check: classToken.tokenType == TokenType.Class - classToken.getName(code) == "StateMachine" + classToken.getName(code) == "StateDie" classToken.children.len == 3 + # Trailing anchor comments should not be included in the token + not classToken.getBody(code).contains("#END") else: echo "Found tokens: ", tokens.len printTokens(tokens, code) From 28427f412cfdbfcb13e760e7137c086a091819e2 Mon Sep 17 00:00:00 2001 From: Nathan Lovato <12694995+NathanLovato@users.noreply.github.com> Date: Sat, 21 Dec 2024 00:26:27 +0100 Subject: [PATCH 3/7] refactor: move types, document them, and draft proc to parse anchors --- mdx-utils/src/gdscript/parser_gdscript.nim | 107 +++++++++++++++++---- 1 file changed, 88 insertions(+), 19 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index f2cd838ee1..4298abb178 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -26,13 +26,46 @@ type children: seq[Token] Scanner = object - # TODO: Cache the source elsewhere for reading the content of tokens after parsing. source: string current: int indentLevel: int bracketDepth: int peekIndex: int + CodeAnchor = object + ## A code anchor is how we call comments used to mark a region in the code, with the form + ## #ANCHOR:anchor_name + ## ... + ## #END:anchor_name + ## + ## This object is used to extract the code between the anchor and the end tag. + nameStart, nameEnd: int + codeStart, codeEnd: int + # Used to remove the anchor tags from the final code + # codeStart marks the end of the anchor tag, codeEnd marks the start of the end tag + anchorTagStart, endTagEnd: int + + GDScriptFile = object + ## Represents a parsed GDScript file with its symbols and source code + filePath: string + source: string + ## Map of symbol names to their tokens + symbols: Table[string, Token] + ## Map of anchor names to their code anchors + anchors: Table[string, CodeAnchor] + + SymbolQuery = object + ## Represents a query to get a symbol from a GDScript file, like + ## ClassName.definition or func_name.body or var_name. + name: string + isDefinition: bool + isBody: bool + isClass: bool + childName: string + +# Caches parsed GDScript files +var gdscriptFiles = initTable[string, GDScriptFile]() + proc getCode(token: Token, source: string): string {.inline.} = return source[token.range.start ..< token.range.end] @@ -156,6 +189,9 @@ proc scanIdentifier(s: var Scanner): tuple[start: int, `end`: int] {.inline.} = result = (start, s.current) proc scanToEndOfLine(s: var Scanner): tuple[start, `end`: int] {.inline.} = + ## Scans to the end of the current line, returning the start (the current + ## position when the function was called) and end positions (the \n character + ## at the end of the line). let start = s.current let length = s.source.len var offset = 0 @@ -201,8 +237,8 @@ proc isNewDefinition(s: var Scanner): bool {.inline.} = s.peekString("func") or s.peekString("var") or s.peekString("const") or s.peekString("class") or s.peekString("signal") or s.peekString("enum") or # TODO: Consider how to handle regular comments vs anchors - s.peekString("#ANCHOR") or s.peekString("#END") or - s.peekString("# ANCHOR") or s.peekString("# END") + s.peekString("#ANCHOR") or s.peekString("#END") or s.peekString("# ANCHOR") or + s.peekString("# END") s.current = savedPos return result @@ -223,6 +259,52 @@ proc scanBody(s: var Scanner, startIndent: int): tuple[bodyStart, bodyEnd: int] s.current = index + 1 result = (start, s.current) +proc scanAnchorToken(s: var Scanner, startPos: int): CodeAnchor = + ## Scans from a #ANCHOR tag to the matching #END tag + ## This is used in a preprocessing pass. + + result.anchorTagStart = startPos + + # Skip the #ANCHOR: or # ANCHOR: part + while s.getCurrentChar() != ':': + s.current += 1 + s.skipWhitespace() + # Skip the colon to parse the anchor name + s.current += 1 + + let (nameStart, nameEnd) = s.scanIdentifier() + discard s.scanToEndOfLine() + + result.nameStart = nameStart + result.nameEnd = nameEnd + result.codeStart = s.current + + # Look for the matching END tag + let anchorName = s.source[nameStart ..< nameEnd] + var foundEndTag = false + + let endTag = "#END:" & anchorName + let endTagWithSpace = "# END:" & anchorName + while not s.isAtEnd(): + s.skipWhitespace() + if s.peekString(endTag) or s.peekString(endTagWithSpace): + foundEndTag = true + result.codeEnd = s.current + discard s.scanToEndOfLine() + result.endTagEnd = s.current + break + + if not s.isAtEnd(): + discard s.advance() + + if not foundEndTag: + # The anchor is not closed, reset the scanner to after the anchor's opening + # tag, raise an error, and allow continuing with parsing. + s.current = result.codeStart + raise newException( + ValueError, "Anchor region " & anchorName & " is missing an #END tag." + ) + proc scanToken(s: var Scanner): Token = while not s.isAtEnd(): s.indentLevel = s.countIndentationAndAdvance() @@ -389,14 +471,6 @@ proc parseGDScript(source: string): seq[Token] = token.range.end = scanner.current result.add(token) -type GDScriptFile = object - filePath: string - source: string - symbols: Table[string, Token] - -# Caches parsed GDScript files -var gdscriptFiles = initTable[string, GDScriptFile]() - proc parseGDScriptFile(path: string) = # Parses a GDScript file and caches it let source = readFile(path) @@ -446,13 +520,6 @@ proc getSymbolBody(symbolName: string, path: string): string = let file = gdscriptFiles[path] return token.getBody(file.source) -type SymbolQuery = object - name: string - isDefinition: bool - isBody: bool - isClass: bool - childName: string - proc parseSymbolQuery(query: string): SymbolQuery = ## Turns a symbol query string like ClassName.body or ClassName.function.definition ## into a SymbolQuery object for easier processing. @@ -656,7 +723,8 @@ class StateMachine extends Node: classToken.children[3].tokenType == TokenType.Function test "Parse larger inner class with anchors": - let code = """ + let code = + """ #ANCHOR:class_StateDie class StateDie extends State: @@ -678,6 +746,7 @@ class StateDie extends State: #END:class_StateDie """ let tokens = parseGDScript(code) + echo tokens[0].getCode(code) check: tokens.len == 1 if tokens.len == 1: From 70666fcb3541edeb4ec2a3bb5c63416534f21617 Mon Sep 17 00:00:00 2001 From: Nathan Lovato <12694995+NathanLovato@users.noreply.github.com> Date: Sat, 21 Dec 2024 00:31:27 +0100 Subject: [PATCH 4/7] docs: update module docs --- mdx-utils/src/gdscript/parser_gdscript.nim | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index 4298abb178..6649a28885 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -1,4 +1,22 @@ -## Minimal GDScript parser specialized for code include shortcodes. Tokenizes symbol definitions and their body and collects all their content. +## Minimal GDScript parser specialized for code include shortcodes. Tokenizes +## symbol definitions and their body and collects all their content. +## +## Preprocesses GDScript code to extract code between anchor comments, like +## #ANCHOR:anchor_name +## ... Code here +## #END:anchor_name +## +## This works in 2 passes: +## +## 1. Preprocesses the code to extract the code between anchor comments and remove anchor comments. +## 2. Parses the preprocessed code to tokenize symbols and their content. +## +## Users can then query and retrieve the code between anchors or the definition +## and body of a symbol. +## +## This was originally written as a tool to only parse GDScript symbols, with +## the anchor preprocessing added later, so the approach may not be the most +## efficient. import std/[tables, unittest, strutils, times] when compileOption("profiler"): import std/nimprof From d22499609f1d739ab798ffddc35a5ba07fbf66cc Mon Sep 17 00:00:00 2001 From: Nathan Lovato <12694995+NathanLovato@users.noreply.github.com> Date: Sat, 21 Dec 2024 09:37:00 +0100 Subject: [PATCH 5/7] add function to preprocess code files --- mdx-utils/src/gdscript/parser_gdscript.nim | 29 ++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index 6649a28885..eca6abbf60 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -277,11 +277,11 @@ proc scanBody(s: var Scanner, startIndent: int): tuple[bodyStart, bodyEnd: int] s.current = index + 1 result = (start, s.current) -proc scanAnchorToken(s: var Scanner, startPos: int): CodeAnchor = +proc scanAnchor(s: var Scanner): CodeAnchor = ## Scans from a #ANCHOR tag to the matching #END tag ## This is used in a preprocessing pass. - result.anchorTagStart = startPos + result.anchorTagStart = s.current # Skip the #ANCHOR: or # ANCHOR: part while s.getCurrentChar() != ':': @@ -323,6 +323,31 @@ proc scanAnchorToken(s: var Scanner, startPos: int): CodeAnchor = ValueError, "Anchor region " & anchorName & " is missing an #END tag." ) +proc preprocessAnchors( + source: string +): tuple[anchors: seq[CodeAnchor], processed: string] = + ## Preprocesses the source code to extract the code between anchor comments + ## and remove the anchor comments. + var anchors: seq[CodeAnchor] = @[] + var newSource = newStringOfCap(source.len) + var s = + Scanner(source: source, current: 0, indentLevel: 0, bracketDepth: 0, peekIndex: 0) + + var lastEnd = 0 + while not s.isAtEnd(): + let c = s.getCurrentChar() + if c == '#': + if s.peekString("#ANCHOR") or s.peekString("# ANCHOR"): + let anchor = scanAnchor(s) + anchors.add(anchor) + newSource.add(source[lastEnd ..< anchor.anchorTagStart]) + newSource.add(source[anchor.codeStart ..< anchor.codeEnd]) + lastEnd = anchor.endTagEnd + s.current += 1 + + newSource.add(source[lastEnd ..< source.len]) + return (anchors, newSource) + proc scanToken(s: var Scanner): Token = while not s.isAtEnd(): s.indentLevel = s.countIndentationAndAdvance() From 614b79ec1ef3bfd099d49f5280da26a4125fc3a7 Mon Sep 17 00:00:00 2001 From: Nathan Lovato Date: Sat, 21 Dec 2024 12:55:04 +0100 Subject: [PATCH 6/7] complete anchor preprocessor --- mdx-utils/src/gdscript/parser_gdscript.nim | 40 ++++++++++++++++------ 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index eca6abbf60..9ad9caa29f 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -325,9 +325,12 @@ proc scanAnchor(s: var Scanner): CodeAnchor = proc preprocessAnchors( source: string -): tuple[anchors: seq[CodeAnchor], processed: string] = +): tuple[anchors: Table[string,CodeAnchor], processed: string] = ## Preprocesses the source code to extract the code between anchor comments ## and remove the anchor comments. + ## + ## Returns a table that maps anchor names to the CodeAnchor objects for easy + ## lookup and the processed source code, with the anchor comments removed. var anchors: seq[CodeAnchor] = @[] var newSource = newStringOfCap(source.len) var s = @@ -346,7 +349,10 @@ proc preprocessAnchors( s.current += 1 newSource.add(source[lastEnd ..< source.len]) - return (anchors, newSource) + var anchorsTable = initTable[string, CodeAnchor](anchors.len) + for anchor in anchors: + anchorsTable[s.source[anchor.nameStart ..< anchor.nameEnd]] = anchor + return (anchorsTable, newSource) proc scanToken(s: var Scanner): Token = while not s.isAtEnd(): @@ -515,15 +521,27 @@ proc parseGDScript(source: string): seq[Token] = result.add(token) proc parseGDScriptFile(path: string) = - # Parses a GDScript file and caches it + ## Parses a GDScript file and caches it in the gdscriptFiles table. + ## The parsing happens in two passes: + ## + ## 1. We preprocess the source code to extract the code between anchor comments and remove these comment lines. + ## 2. We parse the preprocessed source code to tokenize symbols and their content. + ## + ## Preprocessing makes the symbol parsing easier afterwards, although it means we scan the file twice. let source = readFile(path) - let tokens = parseGDScript(source) + let (anchors, processedSource) = preprocessAnchors(source) + let tokens = parseGDScript(processedSource) var symbols = initTable[string, Token]() for token in tokens: - let name = token.getName(source) + let name = token.getName(processedSource) symbols[name] = token - gdscriptFiles[path] = GDScriptFile(filePath: path, source: source, symbols: symbols) + gdscriptFiles[path] = GDScriptFile( + filePath: path, + source: source, + symbols: symbols, + anchors: anchors + ) proc getTokenFromCache(symbolName: string, filePath: string): Token = # Gets a token from the cache given a symbol name and the path to the GDScript file @@ -788,21 +806,21 @@ class StateDie extends State: ) #END:class_StateDie """ - let tokens = parseGDScript(code) - echo tokens[0].getCode(code) + let (anchors, processedSource) = preprocessAnchors(code) + let tokens = parseGDScript(processedSource) check: tokens.len == 1 if tokens.len == 1: let classToken = tokens[0] check: classToken.tokenType == TokenType.Class - classToken.getName(code) == "StateDie" + classToken.getName(processedSource) == "StateDie" classToken.children.len == 3 # Trailing anchor comments should not be included in the token - not classToken.getBody(code).contains("#END") + not classToken.getBody(processedSource).contains("#END") else: echo "Found tokens: ", tokens.len - printTokens(tokens, code) + printTokens(tokens, processedSource) when isMainModule: runUnitTests() From 6402a731819d8edcb9c9ca55052bf4278ddcab94 Mon Sep 17 00:00:00 2001 From: Nathan Lovato Date: Sat, 21 Dec 2024 15:27:40 +0100 Subject: [PATCH 7/7] refactor preprocessor, use anchor processing from gdscript parser --- mdx-utils/src/gdscript/parser_gdscript.nim | 18 +- mdx-utils/src/md/preprocessor.nim | 186 +++++++++++---------- 2 files changed, 113 insertions(+), 91 deletions(-) diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim index 9ad9caa29f..c4e3bd2c9b 100644 --- a/mdx-utils/src/gdscript/parser_gdscript.nim +++ b/mdx-utils/src/gdscript/parser_gdscript.nim @@ -607,9 +607,10 @@ proc parseSymbolQuery(query: string): SymbolQuery = else: raise newException(ValueError, "Invalid symbol query: '" & query & "'") -proc getCode*(symbolQuery: string, filePath: string): string = +proc getCodeForSymbol*(symbolQuery: string, filePath: string): string = ## Gets the code of a symbol given a query and the path to the file ## The query can be: + ## ## - A symbol name like a function or class name ## - The path to a symbol, like ClassName.functionName ## - The request of a definition, like functionName.definition @@ -638,6 +639,21 @@ proc getCode*(symbolQuery: string, filePath: string): string = else: result = getSymbolText(query.name, filePath) +proc getCodeForAnchor*(anchorName: string, filePath: string): string = + ## Gets the code between anchor comments given the anchor name and the path to the file + if not gdscriptFiles.hasKey(filePath): + echo filePath & " not in cache. Parsing file..." + parseGDScriptFile(filePath) + + let file = gdscriptFiles[filePath] + if not file.anchors.hasKey(anchorName): + raise newException( + ValueError, "Anchor '" & anchorName & "' not found in file: '" & filePath & "'" + ) + + let anchor = file.anchors[anchorName] + return file.source[anchor.codeStart ..< anchor.codeEnd] + proc runPerformanceTest() = let codeTest = """ diff --git a/mdx-utils/src/md/preprocessor.nim b/mdx-utils/src/md/preprocessor.nim index 093b6b9037..4acc74fa9f 100644 --- a/mdx-utils/src/md/preprocessor.nim +++ b/mdx-utils/src/md/preprocessor.nim @@ -94,23 +94,91 @@ proc preprocessGodotIcon(match: RegexMatch, context: HandlerContext): string = result = match.match proc preprocessIncludeComponent(match: RegexMatch, context: HandlerContext): string = - ## Replaces the Include shortcode with the contents of the section of a file or full file it points to. + ## Processes the Include component, which includes code from a file. Uses the + ## GDScript parser module to extract code from the file. + ## + ## The Include component can take the following props: + ## + ## - file: the name or project-relative path to the file to include + ## - symbol: the symbol query to look for in the file, like a class name, a + ## function name, etc. It also supports forms like ClassName.definition or + ## ClassName.method_name.body + ## - anchor: the anchor to look for in the file. You must use one of symbol or + ## anchor, not both. + ## - prefix: a string to add at the beginning of each line of the included + ## code, typically + or - for diff code listings + ## - dedent: the number of tabs to remove from the beginning of each line of the + ## included code + ## - replace: a JSX expression with an array of objects, each containing a source + ## and replacement key. The source is the string to look for in the code, and the + ## replacement is the string to replace it with. + + proc processSearchAndReplace(code: string, replaceJsxObject: string): string = + ## Processes an include with a replace prop. + ## It's a JSX expression with an array of objects, each containing a source and replacement key. + type SearchAndReplace = object + source: string + replacement: string + + # Parse the replaceJsxObject prop. It's a JSX expression with either a single object or an array of objects. + # TODO: add error handling + # TODO: this currently cannot work because the MDX component parsing cannot capture JSX expressions as props + # TODO: later: replace with MDX component parser + var searchesAndReplaces: seq[SearchAndReplace] = @[] + # Remove the array mark if relevant, then parse objects - this should work + # for both array and single object formats + let replacesStr = replaceJsxObject.strip(chars = {'[', ']'}) + let matches = replacesStr.findAll(regexObjectPattern) + + for match in matches: + var keyValuePairs = match.strip(chars = {'{', '}'}).split(",") + var source, replacement: string + + for part in keyValuePairs: + let kv = part.strip().split(":") + if kv.len == 2: + let key = kv[0].strip().strip(chars = {'"'}) + let value = kv[1].strip().strip(chars = {'"'}) + if key == "source": + source = value + elif key == "replacement": + replacement = value + + searchesAndReplaces.add( + SearchAndReplace(source: source, replacement: replacement) + ) + + let replaces = searchesAndReplaces + + # Apply all replacements + for searchAndReplace in replaces: + result = + result.replace(searchAndReplace.source, searchAndReplace.replacement) + + proc processCodeLines(code: string, prefix: string, dedent: int): string = + ## Adds a prefix to each line of the code block and dedents it. + var prefixedLines: seq[string] = @[] + + for line in code.splitLines(): + var processedLine = line + if dedent > 0: + for i in 1 .. dedent: + if processedLine.startsWith("\t"): + processedLine = processedLine[1 ..^ 1] + prefixedLines.add(prefix & processedLine) + result = prefixedLines.join("\n") + let component = parseMDXComponent(match.match) let args = component.props let file = args.getOrDefault("file", "") - let includeFilePath = utils.cache.findCodeFile(file) - # TODO: Replace with gdscript parser, get symbols or anchors from the parser: - # TODO: add support for symbol prop # TODO: error handling: # - if there's a replace prop, ensure it's correctly formatted # - warn about using anchor + symbol (one should take precedence) - # - check that prefix is valid (- or +) try: - result = readFile(includeFilePath) if "symbol" in args: - let symbol = args["symbol"] + let symbol = args.getOrDefault("symbol", "") if symbol == "": let errorMessage = fmt"Symbol prop is empty in include component for file {includeFilePath}. Returning an empty string." @@ -118,98 +186,36 @@ proc preprocessIncludeComponent(match: RegexMatch, context: HandlerContext): str preprocessorErrorMessages.add(errorMessage) return "" - return getCode(symbol, includeFilePath) + result = getCodeForSymbol(symbol, includeFilePath) elif "anchor" in args: - let - anchor = args["anchor"] - regexAnchor = re( - fmt( - r"(?s)\h*(?:#|\/\/)\h*ANCHOR:\h*\b{anchor}\b\h*\v(?P.*?)\s*(?:#|\/\/)\h*END:\h*\b{anchor}\b" - ) - ) - - let anchorMatch = result.find(regexAnchor) - if anchorMatch.isSome(): - let - anchorCaptures = anchorMatch.get.captures.toTable() - output = anchorCaptures["contents"] - lines = output.splitLines() - var prefixedLines: seq[string] = @[] - - # Add prefix and dedent the code block if applicable - let - prefix = args.getOrDefault("prefix", "") - dedent = - try: - parseInt(args.getOrDefault("dedent", "0")) - except: - 0 - - for line in lines: - var processedLine = line - if dedent > 0: - for i in 1 .. dedent: - if processedLine.startsWith("\t"): - processedLine = processedLine[1 ..^ 1] - prefixedLines.add(prefix & processedLine) - result = prefixedLines.join("\n") - - if "replace" in args: - type SearchAndReplace = object - source: string - replacement: string - - # Parse the replace prop. It's a JSX expression with either a single object or an array of objects. - # TODO: add error handling - # TODO: this currently cannot work because the MDX component parsing cannot capture JSX expressions as props - let replaces = - try: - # Remove the array mark if relevant, then parse objects - this should work - # for both array and single object formats - let replacesStr = args["replace"].strip(chars = {'[', ']'}) - let matches = replacesStr.findAll(regexObjectPattern) - var searchesAndReplaces: seq[SearchAndReplace] = @[] - - for match in matches: - var keyValuePairs = match.strip(chars = {'{', '}'}).split(",") - var source, replacement: string - - for part in keyValuePairs: - let kv = part.strip().split(":") - if kv.len == 2: - let key = kv[0].strip().strip(chars = {'"'}) - let value = kv[1].strip().strip(chars = {'"'}) - if key == "source": - source = value - elif key == "replacement": - replacement = value - - searchesAndReplaces.add( - SearchAndReplace(source: source, replacement: replacement) - ) - - searchesAndReplaces - except: - @[] - - # Apply all replacements - for searchAndReplace in replaces: - result = - result.replace(searchAndReplace.source, searchAndReplace.replacement) - else: + let anchor = args.getOrDefault("anchor", "") + if anchor == "": let errorMessage = - fmt"Can't find matching contents for anchor {anchor} in file {includeFilePath}. Nothing will be included." + fmt"Anchor prop is empty in include component for file {includeFilePath}. Returning an empty string." stderr.styledWriteLine(fgRed, errorMessage) preprocessorErrorMessages.add(errorMessage) return "" + result = getCodeForAnchor(anchor, includeFilePath) + + # Add prefix and dedent the code block if applicable + let + prefix = args.getOrDefault("prefix", "") + dedent = + try: + parseInt(args.getOrDefault("dedent", "0")) + except: + 0 + + if prefix != "" or dedent > 0: + result = processCodeLines(result, prefix, dedent) + if "replace" in args: + result = processSearchAndReplace(result, args["replace"]) - # Clean up anchor markers and extra newlines - result = result.replace(regexAnchorLine, "").strip(chars = {'\n'}) except IOError: - let errorMessage = fmt"Failed to read include file: {includeFilePath}" + let errorMessage = fmt"Failed to read include file: {includeFilePath}. No code will be included." stderr.styledWriteLine(fgRed, errorMessage) preprocessorErrorMessages.add(errorMessage) - result = match.match + result = "" proc preprocessMarkdownImage(match: RegexMatch, context: HandlerContext): string = ## Replaces the relative input image path with an absolute path in the website's public directory.