diff --git a/mdx-utils/src/gdscript/parser_gdscript.nim b/mdx-utils/src/gdscript/parser_gdscript.nim
index cb9695f159..8a95161f10 100644
--- a/mdx-utils/src/gdscript/parser_gdscript.nim
+++ b/mdx-utils/src/gdscript/parser_gdscript.nim
@@ -1,7 +1,41 @@
-## Minimal GDScript parser specialized for code include shortcodes. Tokenizes symbol definitions and their body and collects all their content.
-import std/[tables, unittest, strutils, times]
+## Minimal GDScript parser specialized for code include shortcodes. Tokenizes
+## symbol definitions and their body and collects all their content.
+##
+## Preprocesses GDScript code to extract code between anchor comments, like
+## #ANCHOR:anchor_name
+## ... Code here
+## #END:anchor_name
+##
+## This works in 2 passes:
+##
+## 1. Preprocesses the code to extract the code between anchor comments and remove anchor comments.
+## 2. Parses the preprocessed code to tokenize symbols and their content.
+##
+## Users can then query and retrieve the code between anchors or the definition
+## and body of a symbol.
+##
+## This was originally written as a tool to only parse GDScript symbols, with
+## the anchor preprocessing added later, so the approach may not be the most
+## efficient.
+import std/[tables, unittest, strutils, times, terminal]
when compileOption("profiler"):
import std/nimprof
+when isMainModule:
+ import std/os
+
+const isDebugBuild* = defined(debug)
+
+template debugEcho*(message: string) =
+ ## Prints out a debug message to the console, only in debug builds, with the
+ ## --stacktrace:on flag (this feature is needed to print which function the
+ ## print is part of).
+ ##
+ ## In release builds, it generates no code to avoid performance overhead.
+ when isDebugBuild and compileOption("stacktrace"):
+ let frame = getFrame()
+ let stackDepth = min(frame.calldepth, 5)
+ let indent = " ".repeat(stackDepth)
+ echo indent, "[", frame.procname, "]: ", message
type
TokenType = enum
@@ -26,13 +60,55 @@ type
children: seq[Token]
Scanner = object
- # TODO: Cache the source elsewhere for reading the content of tokens after parsing.
source: string
current: int
indentLevel: int
bracketDepth: int
peekIndex: int
+ AnchorTag = object
+ ## Represents a code anchor tag, either a start or end tag,
+ ## like #ANCHOR:anchor_name or #END:anchor_name
+ isStart: bool
+ name: string
+ startPosition: int
+ endPosition: int
+
+ CodeAnchor = object
+ ## A code anchor is how we call comments used to mark a region in the code, with the form
+ ## #ANCHOR:anchor_name
+ ## ...
+ ## #END:anchor_name
+ ##
+ ## This object is used to extract the code between the anchor and the end tag.
+ nameStart, nameEnd: int
+ codeStart, codeEnd: int
+ # Used to remove the anchor tags from the final code
+ # codeStart marks the end of the anchor tag, codeEnd marks the start of the end tag
+ anchorTagStart, endTagEnd: int
+
+ GDScriptFile = object
+ ## Represents a parsed GDScript file with its symbols and source code
+ filePath: string
+ source: string
+ ## Map of symbol names to their tokens
+ symbols: Table[string, Token]
+ ## Map of anchor names to their code anchors
+ anchors: Table[string, CodeAnchor]
+ processedSource: string
+
+ SymbolQuery = object
+ ## Represents a query to get a symbol from a GDScript file, like
+ ## ClassName.definition or func_name.body or var_name.
+ name: string
+ isDefinition: bool
+ isBody: bool
+ isClass: bool
+ childName: string
+
+# Caches parsed GDScript files
+var gdscriptFiles = initTable[string, GDScriptFile]()
+
proc getCode(token: Token, source: string): string {.inline.} =
return source[token.range.start ..< token.range.end]
@@ -64,6 +140,18 @@ proc printTokens(tokens: seq[Token], source: string) =
printToken(token, source)
echo ""
+proc charMakeWhitespaceVisible*(c: char): string =
+ ## Replaces whitespace characters with visible equivalents.
+ case c
+ of '\t':
+ result = "⇥"
+ of '\n':
+ result = "↲"
+ of ' ':
+ result = "·"
+ else:
+ result = $c
+
proc getCurrentChar(s: Scanner): char {.inline.} =
## Returns the current character without advancing the scanner's current index
return s.source[s.current]
@@ -73,6 +161,9 @@ proc advance(s: var Scanner): char {.inline.} =
result = s.source[s.current]
s.current += 1
+proc isAtEnd(s: Scanner): bool {.inline.} =
+ s.current >= s.source.len
+
proc peekAt(s: var Scanner, offset: int): char {.inline.} =
## Peeks at a specific offset and returns the character without advancing the scanner
s.peekIndex = s.current + offset
@@ -94,14 +185,6 @@ proc advanceToPeek(s: var Scanner) {.inline.} =
## Advances the scanner to the stored getCurrentChar index
s.current = s.peekIndex
-proc match(s: var Scanner, expected: char): bool {.inline.} =
- ## Returns true and advances the scanner if and only if the current character matches the expected character
- ## Otherwise, returns false
- if s.getCurrentChar() != expected:
- return false
- discard s.advance()
- return true
-
proc matchString(s: var Scanner, expected: string): bool {.inline.} =
## Returns true and advances the scanner if and only if the next characters match the expected string
if s.peekString(expected):
@@ -109,39 +192,40 @@ proc matchString(s: var Scanner, expected: string): bool {.inline.} =
return true
return false
-proc countIndentation(s: var Scanner): int {.inline.} =
+proc countIndentationAndAdvance(s: var Scanner): int {.inline.} =
## Counts the number of spaces and tabs starting from the current position
+ ## Advances the scanner as it counts the indentation
## Call this function at the start of a line to count the indentation
result = 0
- while true:
+ while not s.isAtEnd():
+ debugEcho "Current index: " & $s.current
+ debugEcho "Current char is: " & s.getCurrentChar().charMakeWhitespaceVisible()
case s.getCurrentChar()
of '\t':
result += 1
s.current += 1
of ' ':
var spaces = 0
- while s.getCurrentChar() == ' ':
+ while not s.isAtEnd() and s.getCurrentChar() == ' ':
spaces += 1
s.current += 1
result += spaces div 4
break
else:
break
+ debugEcho "Indentation: " & $result
return result
proc skipWhitespace(s: var Scanner) {.inline.} =
## Peeks at the next characters and advances the scanner until a non-whitespace character is found
- while true:
+ while not s.isAtEnd():
let c = s.getCurrentChar()
case c
of ' ', '\r', '\t':
- discard s.advance()
+ s.current += 1
else:
break
-proc isAtEnd(s: Scanner): bool {.inline.} =
- s.current >= s.source.len
-
proc isAlphanumericOrUnderscore(c: char): bool {.inline.} =
## Returns true if the character is a letter, digit, or underscore
let isLetter = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or c == '_'
@@ -150,11 +234,20 @@ proc isAlphanumericOrUnderscore(c: char): bool {.inline.} =
proc scanIdentifier(s: var Scanner): tuple[start: int, `end`: int] {.inline.} =
let start = s.current
- while isAlphanumericOrUnderscore(s.getCurrentChar()):
+ while not s.isAtEnd() and isAlphanumericOrUnderscore(s.getCurrentChar()):
discard s.advance()
result = (start, s.current)
-proc scanToEndOfLine(s: var Scanner): tuple[start, `end`: int] {.inline.} =
+proc scanToStartOfNextLine(s: var Scanner): tuple[start, `end`: int] {.inline.} =
+ ## Scans and advances to the first character of the next line.
+ ##
+ ## Returns a tuple of:
+ ##
+ ## - The current position at the start of the function call
+ ## - The position of the first character of the next line
+ if s.isAtEnd():
+ return (s.current, s.current)
+
let start = s.current
let length = s.source.len
var offset = 0
@@ -198,29 +291,182 @@ proc isNewDefinition(s: var Scanner): bool {.inline.} =
s.skipWhitespace()
result =
s.peekString("func") or s.peekString("var") or s.peekString("const") or
- s.peekString("class") or s.peekString("signal") or s.peekString("enum")
+ s.peekString("class") or s.peekString("signal") or s.peekString("enum") or
+ # TODO: Consider how to handle regular comments vs anchors
+ s.peekString("#ANCHOR") or s.peekString("#END") or s.peekString("# ANCHOR") or
+ s.peekString("# END")
s.current = savedPos
return result
proc scanBody(s: var Scanner, startIndent: int): tuple[bodyStart, bodyEnd: int] =
let start = s.current
while not s.isAtEnd():
- let currentIndent = s.countIndentation()
+ let currentIndent = s.countIndentationAndAdvance()
if currentIndent <= startIndent and not s.isAtEnd():
if isNewDefinition(s):
break
- discard scanToEndOfLine(s)
+ discard scanToStartOfNextLine(s)
+ # s.current points to the first letter of the next token, after the
+ # indentation. We need to backtrack to find the actual end of the body.
+ var index = s.current - 1
+ while index > 0 and s.source[index] in [' ', '\r', '\t', '\n']:
+ index -= 1
+ s.current = index + 1
result = (start, s.current)
+proc scanAnchorTags(s: var Scanner): seq[AnchorTag] =
+ ## Scans the entire file and collects all anchor tags (both start and end)
+ ## Returns them in the order they appear in the source code
+
+ while not s.isAtEnd():
+ if s.getCurrentChar() == '#':
+ let startPosition = s.current
+
+ # Look for an anchor, if not found skip to the next line. An anchor has
+ # to take a line on its own.
+ s.current += 1
+ s.skipWhitespace()
+
+ let isAnchor = s.peekString("ANCHOR")
+ let isEnd = s.peekString("END")
+
+ if not (isAnchor or isEnd):
+ discard s.scanToStartOfNextLine()
+ continue
+ else:
+ var tag = AnchorTag(isStart: isAnchor)
+ tag.startPosition = startPosition
+ s.advanceToPeek()
+ debugEcho "Found tag: " & s.source[startPosition ..< s.current]
+
+ # Jump to after the colon (:) to find the tag's name
+ while s.getCurrentChar() != ':':
+ s.current += 1
+ s.skipWhitespace()
+ s.current += 1
+ s.skipWhitespace()
+
+ let (nameStart, nameEnd) = s.scanIdentifier()
+ tag.name = s.source[nameStart ..< nameEnd]
+
+ let (_, lineEnd) = s.scanToStartOfNextLine()
+ tag.endPosition = lineEnd
+
+ result.add(tag)
+
+ # If the current char isn't a line return, backtrack s.current to the line return
+ while not s.isAtEnd() and s.getCurrentChar() != '\n':
+ s.current -= 1
+
+ s.current += 1
+ debugEcho "Found " & $result.len & " anchor tags"
+
+proc preprocessAnchors(
+ source: string
+): tuple[anchors: Table[string, CodeAnchor], processed: string] =
+ ## This function scans the source code for anchor tags and looks for matching opening and closing tags.
+ ## Anchor tags are comments used to mark a region in the code, with the form:
+ ##
+ ## #ANCHOR:anchor_name
+ ## ...
+ ## #END:anchor_name
+ ##
+ ## The function returns:
+ ##
+ ## 1. a table of anchor region names mapped to CodeAnchor
+ ## objects, each representing a region of code between an anchor and its
+ ## matching end tag.
+ ## 2. A string with the source code with the anchor comment lines removed, to
+ ## parse symbols more easily in a separate pass.
+
+ var s =
+ Scanner(source: source, current: 0, indentLevel: 0, bracketDepth: 0, peekIndex: 0)
+
+ # Anchor regions can be nested or intertwined, so we first scan all tags, then match opening and closing tags by name to build CodeAnchor objects
+ let tags = scanAnchorTags(s)
+
+ # Turn tags into tables to find matching pairs and check for duplicate names
+ var startTags = initTable[string, AnchorTag](tags.len div 2)
+ var endTags = initTable[string, AnchorTag](tags.len div 2)
+
+ # TODO: add processed filename/path in errors
+ for tag in tags:
+ if tag.isStart:
+ if tag.name in startTags:
+ stderr.writeLine "\e[31mDuplicate ANCHOR tag found for: " & tag.name & "\e[0m"
+ return
+ startTags[tag.name] = tag
+ else:
+ if tag.name in endTags:
+ stderr.writeLine "\e[31mDuplicate END tag found for: " & tag.name & "\e[0m"
+ return
+ endTags[tag.name] = tag
+
+ # Validate tag pairs and create CodeAnchor objects
+ var anchors = initTable[string, CodeAnchor](tags.len div 2)
+
+ for name, startTag in startTags:
+ if name notin endTags:
+ stderr.writeLine "\e[31mMissing #END tag for anchor: " & name & "\e[0m"
+ return
+ for name, endTag in endTags:
+ if name notin startTags:
+ stderr.writeLine "\e[31mFound #END tag without matching #ANCHOR for: " & name &
+ "\e[0m"
+ return
+
+ for name, startTag in startTags:
+ let endTag = endTags[name]
+ var anchor = CodeAnchor()
+
+ anchor.nameStart = startTag.startPosition
+ anchor.nameEnd = startTag.startPosition + name.len
+ anchor.anchorTagStart = startTag.startPosition
+ anchor.codeStart = startTag.endPosition
+ anchor.codeEnd = block:
+ var codeEndPos = endTag.startPosition
+ while source[codeEndPos] != '\n':
+ codeEndPos -= 1
+ codeEndPos
+ anchor.endTagEnd = endTag.endPosition
+
+ anchors[name] = anchor
+
+ # Preprocess source code by removing anchor tag lines
+ var processedSource = newStringOfCap(source.len)
+ var lastEnd = 0
+ for tag in tags:
+ # Tags can be indented, so we backtrack to the start of the line to strip
+ # the entire line of code containing the tag
+ var tagLineStart = tag.startPosition
+ while tagLineStart > 0 and source[tagLineStart - 1] != '\n':
+ tagLineStart -= 1
+ processedSource.add(source[lastEnd ..< tagLineStart])
+ lastEnd = tag.endPosition
+ processedSource.add(source[lastEnd ..< source.len])
+
+ result = (anchors, processedSource.strip(leading = false, trailing = true))
+
proc scanToken(s: var Scanner): Token =
while not s.isAtEnd():
- s.indentLevel = s.countIndentation()
+ debugEcho "At top of loop. Current index: " & $s.current
+ s.indentLevel = s.countIndentationAndAdvance()
+ debugEcho "Indent level: " & $s.indentLevel
s.skipWhitespace()
+ debugEcho "After whitespace. Current index: " & $s.current
+
+ if s.isAtEnd():
+ break
let startPos = s.current
let c = s.getCurrentChar()
+ debugEcho "Current char: " & $c.charMakeWhitespaceVisible()
case c
+ # Comment, skip to end of line and continue
+ of '#':
+ discard s.scanToStartOfNextLine()
+ continue
# Function definition
of 'f':
if s.matchString("func"):
@@ -235,7 +481,7 @@ proc scanToken(s: var Scanner): Token =
while s.getCurrentChar() != ':':
discard s.advance()
- discard s.scanToEndOfLine()
+ discard s.scanToStartOfNextLine()
token.range.definitionEnd = s.current
token.range.bodyStart = s.current
@@ -334,14 +580,16 @@ proc scanToken(s: var Scanner): Token =
else:
discard s.advance()
else:
- discard s.scanToEndOfLine()
+ discard s.scanToStartOfNextLine()
token.range.end = s.current
+ debugEcho "Parsed signal token: " & $token
return token
else:
discard
s.current += 1
+ debugEcho "Skipping character, current index: " & $s.current
return Token(tokenType: TokenType.Invalid)
@@ -350,7 +598,10 @@ proc parseClass(s: var Scanner, classToken: var Token) =
let classIndent = s.indentLevel
s.current = classToken.range.bodyStart
while not s.isAtEnd():
- let currentIndent = s.countIndentation()
+ debugEcho "Parsing class body. Current index: " & $s.current
+ #Problem: s is on the first char of the token instead of the beginning of the line
+ let currentIndent = s.countIndentationAndAdvance()
+ debugEcho "Current indent: " & $currentIndent
if currentIndent <= classIndent:
if isNewDefinition(s):
break
@@ -374,29 +625,34 @@ proc parseGDScript(source: string): seq[Token] =
token.range.end = scanner.current
result.add(token)
-type GDScriptFile = object
- filePath: string
- source: string
- symbols: Table[string, Token]
-
-# Caches parsed GDScript files
-var gdscriptFiles = initTable[string, GDScriptFile]()
-
proc parseGDScriptFile(path: string) =
- # Parses a GDScript file and caches it
+ ## Parses a GDScript file and caches it in the gdscriptFiles table.
+ ## The parsing happens in two passes:
+ ##
+ ## 1. We preprocess the source code to extract the code between anchor comments and remove these comment lines.
+ ## 2. We parse the preprocessed source code to tokenize symbols and their content.
+ ##
+ ## Preprocessing makes the symbol parsing easier afterwards, although it means we scan the file twice.
let source = readFile(path)
- let tokens = parseGDScript(source)
+ let (anchors, processedSource) = preprocessAnchors(source)
+ let tokens = parseGDScript(processedSource)
var symbols = initTable[string, Token]()
for token in tokens:
- let name = token.getName(source)
+ let name = token.getName(processedSource)
symbols[name] = token
- gdscriptFiles[path] = GDScriptFile(filePath: path, source: source, symbols: symbols)
+ gdscriptFiles[path] = GDScriptFile(
+ filePath: path,
+ source: source,
+ symbols: symbols,
+ anchors: anchors,
+ processedSource: processedSource,
+ )
proc getTokenFromCache(symbolName: string, filePath: string): Token =
# Gets a token from the cache given a symbol name and the path to the GDScript file
if not gdscriptFiles.hasKey(filePath):
- echo "Token not found, " & filePath & " not in cache. Parsing file..."
+ debugEcho "Token not found, " & filePath & " not in cache. Parsing file..."
parseGDScriptFile(filePath)
let file = gdscriptFiles[filePath]
@@ -407,12 +663,6 @@ proc getTokenFromCache(symbolName: string, filePath: string): Token =
return file.symbols[symbolName]
-proc getGDScriptCodeFromCache(filePath: string): var string =
- # Gets the code of a GDScript file from the cache given its path
- if not gdscriptFiles.hasKey(filePath):
- parseGDScriptFile(filePath)
- return gdscriptFiles[filePath].source
-
proc getSymbolText(symbolName: string, path: string): string =
# Gets the text of a symbol given its name and the path to the file
let token = getTokenFromCache(symbolName, path)
@@ -437,13 +687,6 @@ proc getSymbolBody(symbolName: string, path: string): string =
let file = gdscriptFiles[path]
return token.getBody(file.source)
-type SymbolQuery = object
- name: string
- isDefinition: bool
- isBody: bool
- isClass: bool
- childName: string
-
proc parseSymbolQuery(query: string): SymbolQuery =
## Turns a symbol query string like ClassName.body or ClassName.function.definition
## into a SymbolQuery object for easier processing.
@@ -470,9 +713,10 @@ proc parseSymbolQuery(query: string): SymbolQuery =
else:
raise newException(ValueError, "Invalid symbol query: '" & query & "'")
-proc getCode*(symbolQuery: string, filePath: string): string =
+proc getCodeForSymbol*(symbolQuery: string, filePath: string): string =
## Gets the code of a symbol given a query and the path to the file
## The query can be:
+ ##
## - A symbol name like a function or class name
## - The path to a symbol, like ClassName.functionName
## - The request of a definition, like functionName.definition
@@ -500,7 +744,30 @@ proc getCode*(symbolQuery: string, filePath: string): string =
result = getSymbolBody(query.name, filePath)
else:
result = getSymbolText(query.name, filePath)
- result = result.strip(trailing = true)
+
+proc getCodeForAnchor*(anchorName: string, filePath: string): string =
+ ## Gets the code between anchor comments given the anchor name and the path to the file
+ if not gdscriptFiles.hasKey(filePath):
+ debugEcho filePath & " not in cache. Parsing file..."
+ parseGDScriptFile(filePath)
+
+ let file = gdscriptFiles[filePath]
+ if not file.anchors.hasKey(anchorName):
+ styledEcho(fgRed, "Anchor '", anchorName, "' not found in file: '", filePath, "'")
+ return ""
+
+ let anchor = file.anchors[anchorName]
+ return file.source[anchor.codeStart ..< anchor.codeEnd]
+
+proc getCodeWithoutAnchors*(filePath: string): string =
+ ## Gets the preprocessed code of a GDScript file. It's the full script without
+ ## the anchor tag lines like #ANCHOR:anchor_name or #END:anchor_name
+ if not gdscriptFiles.hasKey(filePath):
+ debugEcho filePath & " not in cache. Parsing file..."
+ parseGDScriptFile(filePath)
+
+ let file = gdscriptFiles[filePath]
+ result = file.processedSource
proc runPerformanceTest() =
let codeTest =
@@ -647,6 +914,105 @@ class StateMachine extends Node:
classToken.children[2].tokenType == TokenType.Variable
classToken.children[3].tokenType == TokenType.Function
+ test "Parse larger inner class with anchors":
+ let code =
+ """
+#ANCHOR:class_StateDie
+class StateDie extends State:
+
+ const SmokeExplosionScene = preload("res://assets/vfx/smoke_vfx/smoke_explosion.tscn")
+
+ #ANCHOR:test
+ func _init(init_mob: Mob3D) -> void:
+ super("Die", init_mob)
+
+ func enter() -> void:
+ mob.skin.play("die")
+ #END:test
+
+ var smoke_explosion := SmokeExplosionScene.instantiate()
+ mob.add_sibling(smoke_explosion)
+ smoke_explosion.global_position = mob.global_position
+
+ mob.skin.animation_finished.connect(func (_animation_name: String) -> void:
+ mob.queue_free()
+ )
+#END:class_StateDie
+"""
+ let (anchors, processedSource) = preprocessAnchors(code)
+ echo processedSource
+ quit()
+ let tokens = parseGDScript(processedSource)
+ check:
+ tokens.len == 1
+ if tokens.len == 1:
+ let classToken = tokens[0]
+ check:
+ classToken.tokenType == TokenType.Class
+ classToken.getName(processedSource) == "StateDie"
+ classToken.children.len == 3
+ # Trailing anchor comments should not be included in the token
+ not classToken.getBody(processedSource).contains("#END")
+ else:
+ echo "Found tokens: ", tokens.len
+ printTokens(tokens, processedSource)
+
+ test "Anchor after docstring":
+ let code =
+ """
+## The words that appear on screen at each step.
+#ANCHOR:counting_steps
+@export var counting_steps: Array[String]= ["3", "2", "1", "GO!"]
+#END:counting_steps
+"""
+ let (anchors, processedSource) = preprocessAnchors(code)
+ let tokens = parseGDScript(processedSource)
+ check:
+ tokens.len == 1
+ if tokens.len == 1:
+ let token = tokens[0]
+ check:
+ token.tokenType == TokenType.Variable
+ token.getName(processedSource) == "counting_steps"
+
+ test "Another anchor":
+ let code =
+ """
+## The container for buttons
+#ANCHOR:010_the_container_box
+@onready var action_buttons_v_box_container: VBoxContainer = %ActionButtonsVBoxContainer
+#END:010_the_container_box
+"""
+ let (anchors, processedSource) = preprocessAnchors(code)
+ let tokens = parseGDScript(processedSource)
+ check:
+ tokens.len == 1
+ if tokens.len == 1:
+ let token = tokens[0]
+ check:
+ token.tokenType == TokenType.Variable
+ token.getName(processedSource) == "action_buttons_v_box_container"
+
+ when isMainModule:
+ test "Parse anchor code":
+ let code =
+ """
+ #ANCHOR:row_node_references
+ @onready var row_bodies: HBoxContainer = %RowBodies
+ @onready var row_expressions: HBoxContainer = %RowExpressions
+ #END:row_node_references
+ """
+ let tempFile = getTempDir() / "test_gdscript.gd"
+ writeFile(tempFile, code)
+ let rowNodeReferences = getCodeForAnchor("row_node_references", tempFile)
+ removeFile(tempFile)
+
+ check:
+ rowNodeReferences.contains("var row_bodies: HBoxContainer = %RowBodies")
+ rowNodeReferences.contains(
+ "var row_expressions: HBoxContainer = %RowExpressions"
+ )
+
when isMainModule:
runUnitTests()
#runPerformanceTest()
diff --git a/mdx-utils/src/md/preprocessor.nim b/mdx-utils/src/md/preprocessor.nim
index 093b6b9037..e2d5aa2348 100644
--- a/mdx-utils/src/md/preprocessor.nim
+++ b/mdx-utils/src/md/preprocessor.nim
@@ -90,27 +90,95 @@ proc preprocessGodotIcon(match: RegexMatch, context: HandlerContext): string =
if className in CACHE_GODOT_ICONS:
result = " " & match.match
else:
- echo(fmt"Couldn't find icon for `{className}`. Skipping...")
+ # TODO: replace with warning log, and deduplicate warnings because the same class can be used multiple times
+ # echo(fmt"Couldn't find icon for `{className}`. Skipping...")
result = match.match
proc preprocessIncludeComponent(match: RegexMatch, context: HandlerContext): string =
- ## Replaces the Include shortcode with the contents of the section of a file or full file it points to.
+ ## Processes the Include component, which includes code from a file. Uses the
+ ## GDScript parser module to extract code from the file.
+ ##
+ ## The Include component can take the following props:
+ ##
+ ## - file: the name or project-relative path to the file to include
+ ## - symbol: the symbol query to look for in the file, like a class name, a
+ ## function name, etc. It also supports forms like ClassName.definition or
+ ## ClassName.method_name.body
+ ## - anchor: the anchor to look for in the file. You must use one of symbol or
+ ## anchor, not both.
+ ## - prefix: a string to add at the beginning of each line of the included
+ ## code, typically + or - for diff code listings
+ ## - dedent: the number of tabs to remove from the beginning of each line of the
+ ## included code
+ ## - replace: a JSX expression with an array of objects, each containing a source
+ ## and replacement key. The source is the string to look for in the code, and the
+ ## replacement is the string to replace it with.
+
+ proc processSearchAndReplace(code: string, replaceJsxObject: string): string =
+ ## Processes an include with a replace prop.
+ ## It's a JSX expression with an array of objects, each containing a source and replacement key.
+ type SearchAndReplace = object
+ source: string
+ replacement: string
+
+ # Parse the replaceJsxObject prop. It's a JSX expression with either a single object or an array of objects.
+ # TODO: add error handling
+ # TODO: this currently cannot work because the MDX component parsing cannot capture JSX expressions as props
+ # TODO: later: replace with MDX component parser
+ var searchesAndReplaces: seq[SearchAndReplace] = @[]
+ # Remove the array mark if relevant, then parse objects - this should work
+ # for both array and single object formats
+ let replacesStr = replaceJsxObject.strip(chars = {'[', ']'})
+ let matches = replacesStr.findAll(regexObjectPattern)
+
+ for match in matches:
+ var keyValuePairs = match.strip(chars = {'{', '}'}).split(",")
+ var source, replacement: string
+
+ for part in keyValuePairs:
+ let kv = part.strip().split(":")
+ if kv.len == 2:
+ let key = kv[0].strip().strip(chars = {'"'})
+ let value = kv[1].strip().strip(chars = {'"'})
+ if key == "source":
+ source = value
+ elif key == "replacement":
+ replacement = value
+
+ searchesAndReplaces.add(
+ SearchAndReplace(source: source, replacement: replacement)
+ )
+
+ let replaces = searchesAndReplaces
+
+ # Apply all replacements
+ for searchAndReplace in replaces:
+ result = result.replace(searchAndReplace.source, searchAndReplace.replacement)
+
+ proc processCodeLines(code: string, prefix: string, dedent: int): string =
+ ## Adds a prefix to each line of the code block and dedents it.
+ var prefixedLines: seq[string] = @[]
+
+ for line in code.splitLines():
+ var processedLine = line
+ if dedent > 0:
+ for i in 1 .. dedent:
+ if processedLine.startsWith("\t"):
+ processedLine = processedLine[1 ..^ 1]
+ prefixedLines.add(prefix & processedLine)
+ result = prefixedLines.join("\n")
+
let component = parseMDXComponent(match.match)
let args = component.props
let file = args.getOrDefault("file", "")
-
let includeFilePath = utils.cache.findCodeFile(file)
- # TODO: Replace with gdscript parser, get symbols or anchors from the parser:
- # TODO: add support for symbol prop
# TODO: error handling:
# - if there's a replace prop, ensure it's correctly formatted
# - warn about using anchor + symbol (one should take precedence)
- # - check that prefix is valid (- or +)
try:
- result = readFile(includeFilePath)
if "symbol" in args:
- let symbol = args["symbol"]
+ let symbol = args.getOrDefault("symbol", "")
if symbol == "":
let errorMessage =
fmt"Symbol prop is empty in include component for file {includeFilePath}. Returning an empty string."
@@ -118,98 +186,38 @@ proc preprocessIncludeComponent(match: RegexMatch, context: HandlerContext): str
preprocessorErrorMessages.add(errorMessage)
return ""
- return getCode(symbol, includeFilePath)
+ result = getCodeForSymbol(symbol, includeFilePath)
elif "anchor" in args:
- let
- anchor = args["anchor"]
- regexAnchor = re(
- fmt(
- r"(?s)\h*(?:#|\/\/)\h*ANCHOR:\h*\b{anchor}\b\h*\v(?P.*?)\s*(?:#|\/\/)\h*END:\h*\b{anchor}\b"
- )
- )
-
- let anchorMatch = result.find(regexAnchor)
- if anchorMatch.isSome():
- let
- anchorCaptures = anchorMatch.get.captures.toTable()
- output = anchorCaptures["contents"]
- lines = output.splitLines()
- var prefixedLines: seq[string] = @[]
-
- # Add prefix and dedent the code block if applicable
- let
- prefix = args.getOrDefault("prefix", "")
- dedent =
- try:
- parseInt(args.getOrDefault("dedent", "0"))
- except:
- 0
-
- for line in lines:
- var processedLine = line
- if dedent > 0:
- for i in 1 .. dedent:
- if processedLine.startsWith("\t"):
- processedLine = processedLine[1 ..^ 1]
- prefixedLines.add(prefix & processedLine)
- result = prefixedLines.join("\n")
-
- if "replace" in args:
- type SearchAndReplace = object
- source: string
- replacement: string
-
- # Parse the replace prop. It's a JSX expression with either a single object or an array of objects.
- # TODO: add error handling
- # TODO: this currently cannot work because the MDX component parsing cannot capture JSX expressions as props
- let replaces =
- try:
- # Remove the array mark if relevant, then parse objects - this should work
- # for both array and single object formats
- let replacesStr = args["replace"].strip(chars = {'[', ']'})
- let matches = replacesStr.findAll(regexObjectPattern)
- var searchesAndReplaces: seq[SearchAndReplace] = @[]
-
- for match in matches:
- var keyValuePairs = match.strip(chars = {'{', '}'}).split(",")
- var source, replacement: string
-
- for part in keyValuePairs:
- let kv = part.strip().split(":")
- if kv.len == 2:
- let key = kv[0].strip().strip(chars = {'"'})
- let value = kv[1].strip().strip(chars = {'"'})
- if key == "source":
- source = value
- elif key == "replacement":
- replacement = value
-
- searchesAndReplaces.add(
- SearchAndReplace(source: source, replacement: replacement)
- )
-
- searchesAndReplaces
- except:
- @[]
-
- # Apply all replacements
- for searchAndReplace in replaces:
- result =
- result.replace(searchAndReplace.source, searchAndReplace.replacement)
- else:
+ let anchor = args.getOrDefault("anchor", "")
+ if anchor == "":
let errorMessage =
- fmt"Can't find matching contents for anchor {anchor} in file {includeFilePath}. Nothing will be included."
+ fmt"Anchor prop is empty in include component for file {includeFilePath}. Returning an empty string."
stderr.styledWriteLine(fgRed, errorMessage)
preprocessorErrorMessages.add(errorMessage)
return ""
-
- # Clean up anchor markers and extra newlines
- result = result.replace(regexAnchorLine, "").strip(chars = {'\n'})
+ result = getCodeForAnchor(anchor, includeFilePath)
+ else:
+ result = getCodeWithoutAnchors(includeFilePath)
+
+ # Add prefix and dedent the code block if applicable
+ let
+ prefix = args.getOrDefault("prefix", "")
+ dedent =
+ try:
+ parseInt(args.getOrDefault("dedent", "0"))
+ except:
+ 0
+
+ if prefix != "" or dedent > 0:
+ result = processCodeLines(result, prefix, dedent)
+ if "replace" in args:
+ result = processSearchAndReplace(result, args["replace"])
except IOError:
- let errorMessage = fmt"Failed to read include file: {includeFilePath}"
+ let errorMessage =
+ fmt"Failed to read include file: {includeFilePath}. No code will be included."
stderr.styledWriteLine(fgRed, errorMessage)
preprocessorErrorMessages.add(errorMessage)
- result = match.match
+ result = ""
proc preprocessMarkdownImage(match: RegexMatch, context: HandlerContext): string =
## Replaces the relative input image path with an absolute path in the website's public directory.