diff --git a/src/tokenizer/atl-token-patterns.g.ts b/src/tokenizer/atl-token-patterns.g.ts index dd3703d..c37789d 100644 --- a/src/tokenizer/atl-token-patterns.g.ts +++ b/src/tokenizer/atl-token-patterns.g.ts @@ -5,7 +5,7 @@ // THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR // DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. // ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. -// Last generated: 12/07/2024 18:50:56 (UTC+0) +// Last generated: 14/07/2024 12:28:58 (UTC+0) import { KeywordTokenType, EntityTokenType, MetaTokenType, CharacterTokenType } from "./renpy-tokens"; import { TokenPattern } from "./token-pattern-types"; @@ -432,19 +432,17 @@ export const atlStatements: TokenPattern = { }; export const atlBlockTester: TokenPattern = { - patterns: [ - { - debugName: "atlBlockTester.patterns![0]", + debugName: "atlBlockTester", - contentToken: MetaTokenType.ATLBlock, /*meta.atl-block.renpy*/ - begin: /(?<=(^[ \t]*)(?:camera|image|show|scene|transform|on|block|parallel|contains|choice)\b.*?)(:)/dgm, - beginCaptures: { - 2: { token: CharacterTokenType.Colon, /*punctuation.section.atl.begin.renpy*/ }, - }, - // @ts-ignore: Back references in end patterns are replaced by begin matches at runtime - end: /^(?=(?!\1)[ \t]*[^\s#]|\1[^\s#])/gm, - patterns: [] - }] + contentToken: MetaTokenType.ATLBlock, /*meta.atl-block.renpy*/ + begin: /(?<=(^[ \t]*)(?:camera|image|show|scene|transform|on|block|parallel|contains|choice)\b.*?)(:)/dgm, + beginCaptures: { + 2: { token: CharacterTokenType.Colon, /*punctuation.section.atl.begin.renpy*/ }, + }, + // @ts-ignore: Back references in end patterns are replaced by begin matches at runtime + end: /^(?=(?!\1)[ \t]*[^\s#]|\1[^\s#])/gm, + patterns: [ + ] }; export const transform: TokenPattern = { diff --git a/src/tokenizer/debug-decorator.ts b/src/tokenizer/debug-decorator.ts index 9ff1409..5d6c71d 100644 --- a/src/tokenizer/debug-decorator.ts +++ b/src/tokenizer/debug-decorator.ts @@ -242,7 +242,7 @@ async function updateDecorations() { range: range, hoverMessage: { language: "text", - value: `Token: ${tokenTypeToStringMap[token.type]}(id: ${token.type}) + value: `${token.isMetaToken() ? "MetaToken" : "Token"}: ${tokenTypeToStringMap[token.type]}(id: ${token.type}) Start: {Line: ${range.start.line + 1}, Char: ${range.start.character + 1}} End: {Line: ${range.end.line + 1}, Char: ${range.end.character + 1}} Content: {${content?.replaceAll("\n", "\\n")}}`, @@ -327,6 +327,7 @@ ${(decoration.hoverMessage as MarkdownString).value}`, case KeywordTokenType.Other: case KeywordTokenType.OtherPython: case KeywordTokenType.OtherAudio: + case KeywordTokenType.Layer: case KeywordTokenType.Take: // Renpy style sub-expression keywords case KeywordTokenType.Del: case KeywordTokenType.Clear: diff --git a/src/tokenizer/python-token-patterns.g.ts b/src/tokenizer/python-token-patterns.g.ts index 4819511..ef5b6ce 100644 --- a/src/tokenizer/python-token-patterns.g.ts +++ b/src/tokenizer/python-token-patterns.g.ts @@ -5,7 +5,7 @@ // THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR // DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. // ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. -// Last generated: 12/07/2024 18:44:34 (UTC+0) +// Last generated: 14/07/2024 12:28:58 (UTC+0) import { MetaTokenType, KeywordTokenType, CharacterTokenType, EntityTokenType, OperatorTokenType, LiteralTokenType } from "./renpy-tokens"; import { TokenPattern } from "./token-pattern-types"; @@ -85,7 +85,7 @@ export const docstringStatement: TokenPattern = { debugName: "docstringStatement", // the string either terminates correctly or by the beginning of a new line (this is for single line docstrings that aren't terminated) AND it's not followed by another docstring - begin: /^(?=\s*[rR]?(\'\'\'|\"\"\"|\'|\"))/gm, + begin: /(?<=^[ \t]*)(?=[rR]?(\'\'\'|\"\"\"|\'|\"))/gm, // @ts-ignore: Back references in end patterns are replaced by begin matches at runtime end: /((?<=\1)|^)(?!\s*[rR]?(\'\'\'|\"\"\"|\'|\"))/gm, patterns: [ @@ -351,7 +351,7 @@ export const oddFunctionCall: TokenPattern = { // A bit obscured function call where there may have been an arbitrary number of other operations to get the function.E.g. "arr[idx](args)" token: MetaTokenType.FunctionCall, /*meta.function-call.python*/ - begin: /(?<=\]|\))\s*(?=\()/g, + begin: /(?<=\]|\)|")\s*(?=\()/g, end: /(\))/dg, endCaptures: { 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, @@ -482,7 +482,7 @@ export const numberHex: TokenPattern = { debugName: "numberHex", token: MetaTokenType.ConstantNumeric, /*constant.numeric.hex.python*/ - match: /(? = { Other: { name: "Other", value: KeywordTokenType.Other }, OtherPython: { name: "OtherPython", value: KeywordTokenType.OtherPython }, OtherAudio: { name: "OtherAudio", value: KeywordTokenType.OtherAudio }, + Layer: { name: "Layer", value: KeywordTokenType.Layer }, Take: { name: "Take", value: KeywordTokenType.Take }, Del: { name: "Del", value: KeywordTokenType.Del }, @@ -635,6 +636,7 @@ const tokenTypeDefinitions: EnumToString = { SimpleExpression: { name: "SimpleExpression", value: MetaTokenType.SimpleExpression }, + RenpyBlock: { name: "RenpyBlock", value: MetaTokenType.RenpyBlock }, CodeBlock: { name: "CodeBlock", value: MetaTokenType.CodeBlock }, PythonLine: { name: "PythonLine", value: MetaTokenType.PythonLine }, PythonBlock: { name: "PythonBlock", value: MetaTokenType.PythonBlock }, diff --git a/src/tokenizer/token-patterns.g.ts b/src/tokenizer/token-patterns.g.ts index c40b98b..2246897 100644 --- a/src/tokenizer/token-patterns.g.ts +++ b/src/tokenizer/token-patterns.g.ts @@ -3,10 +3,10 @@ // THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR // DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. // ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. -// Last generated: 12/07/2024 18:44:34 (UTC+0) +// Last generated: 14/07/2024 15:10:14 (UTC+0) -import * as AtlPatterns from "./atl-token-patterns.g"; import * as RenpyPatterns from "./renpy-token-patterns.g"; +import * as AtlPatterns from "./atl-token-patterns.g"; import * as StylePatterns from "./style-token-patterns.g"; import * as PythonPatterns from "./python-token-patterns.g"; import * as ScreenPatterns from "./screen-token-patterns.g"; @@ -19,18 +19,18 @@ RenpyPatterns.parenthesizedPython.patterns![2].patterns!.push(PythonPatterns.exp RenpyPatterns.simpleExpression.patterns!.splice(0, 0, PythonPatterns.string); RenpyPatterns.simpleExpression.patterns!.splice(3, 0, PythonPatterns.memberAccess); RenpyPatterns.simpleExpression.patterns!.splice(4, 0, PythonPatterns.functionCall); -RenpyPatterns.keywords.patterns![1].patterns!.push(PythonPatterns.expression); RenpyPatterns.constantPlaceholder.captures![2].patterns!.push(PythonPatterns.expression); RenpyPatterns.pythonStatements.patterns![1].patterns!.push(PythonPatterns.python); -RenpyPatterns.pythonBlockTester.patterns!.push(PythonPatterns.python); +RenpyPatterns.pythonBlockTester.patterns![0].patterns!.push(PythonPatterns.python); +RenpyPatterns.pythonBlockTester.patterns![1].patterns!.push(PythonPatterns.python); RenpyPatterns.define.patterns!.splice(2, 0, PythonPatterns.expression); RenpyPatterns.define.patterns![3].patterns!.splice(0, 0, PythonPatterns.expression); RenpyPatterns.defaultStatement.patterns!.splice(1, 0, PythonPatterns.expression); RenpyPatterns.defaultStatement.patterns![2].patterns!.splice(0, 0, PythonPatterns.expression); RenpyPatterns.oneLinePython.patterns!.splice(1, 0, PythonPatterns.expression); -RenpyPatterns.sayStatements.patterns![0].endCaptures![3].patterns!.push(PythonPatterns.functionArguments); -RenpyPatterns.sayStatements.patterns![1].endCaptures![3].patterns!.push(PythonPatterns.functionArguments); -RenpyPatterns.conditionals.captures![2].patterns!.splice(0, 0, PythonPatterns.expression); +RenpyPatterns.sayStatements.patterns![0].endCaptures![3].patterns!.push(PythonPatterns.oddFunctionCall); +RenpyPatterns.sayStatements.patterns![1].endCaptures![3].patterns!.push(PythonPatterns.oddFunctionCall); +RenpyPatterns.conditionals.patterns!.splice(0, 0, PythonPatterns.expression); RenpyPatterns.labelName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); RenpyPatterns.labelCall.patterns!.splice(0, 0, PythonPatterns.specialVariables); RenpyPatterns.labelCall.patterns!.push(PythonPatterns.functionArguments); @@ -39,10 +39,10 @@ RenpyPatterns.label.captures![2].patterns!.splice(1, 0, PythonPatterns.parameter RenpyPatterns.returnStatements.patterns!.push(PythonPatterns.expression); RenpyPatterns.callJumpExpression.patterns!.push(PythonPatterns.expression); RenpyPatterns.callPass.patterns!.splice(0, 0, PythonPatterns.functionArguments); -RenpyPatterns.menuOption.beginCaptures![3].patterns!.splice(0, 0, PythonPatterns.functionArguments); +RenpyPatterns.menuOption.beginCaptures![3].patterns!.splice(0, 0, PythonPatterns.oddFunctionCall); RenpyPatterns.menuOption.beginCaptures![3].patterns![1].patterns!.push(PythonPatterns.expression); RenpyPatterns.menuSet.patterns!.push(PythonPatterns.expression); -RenpyPatterns.menu.beginCaptures![3].patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.menu.beginCaptures![3].patterns![1].patterns!.push(PythonPatterns.functionArguments); RenpyPatterns.audioParams.patterns!.push(PythonPatterns.number); RenpyPatterns.play.patterns![0].patterns!.splice(2, 0, PythonPatterns.expression); RenpyPatterns.queue.patterns![0].patterns!.splice(2, 0, PythonPatterns.expression); @@ -83,7 +83,7 @@ AtlPatterns.atlOn.patterns!.push(RenpyPatterns.comments); AtlPatterns.atlFunction.captures![2].patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); AtlPatterns.atlWarperName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); AtlPatterns.atlFallback.patterns!.push(RenpyPatterns.basePatterns); -AtlPatterns.atlBlockTester.patterns![0].patterns!.push(AtlPatterns.atl); +AtlPatterns.atlBlockTester.patterns!.push(AtlPatterns.atl); // Push all ScreenPatterns external includes ScreenPatterns.screenDefName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); @@ -114,7 +114,10 @@ ScreenPatterns.screenText.patterns![1].patterns!.splice(0, 0, RenpyPatterns.stri ScreenPatterns.screen.patterns![0].patterns!.splice(1, 0, PythonPatterns.parameters); ScreenPatterns.screen.patterns![0].patterns!.push(RenpyPatterns.fallbackPatterns); ScreenPatterns.screenFallback.patterns!.push(RenpyPatterns.basePatterns); -ScreenPatterns.screenLanguage.patterns!.splice(0, 0, RenpyPatterns.conditionals); +ScreenPatterns.screenLanguage.patterns!.splice(0, 0, RenpyPatterns.define); +ScreenPatterns.screenLanguage.patterns!.splice(1, 0, RenpyPatterns.defaultStatement); +ScreenPatterns.screenLanguage.patterns!.splice(2, 0, RenpyPatterns.conditionals); +ScreenPatterns.screenLanguage.patterns!.splice(3, 0, RenpyPatterns.pythonStatements); // Push all StylePatterns external includes StylePatterns.styleBlockTester.patterns![0].patterns!.push(RenpyPatterns.fallbackPatterns); @@ -140,4 +143,4 @@ PythonPatterns.doubleOneRegexpComments.patterns!.push(RenpyPatterns.codetags); PythonPatterns.doubleThreeRegexpComments.patterns!.push(RenpyPatterns.codetags); -export { AtlPatterns, RenpyPatterns, StylePatterns, PythonPatterns, ScreenPatterns }; \ No newline at end of file +export { RenpyPatterns, AtlPatterns, StylePatterns, PythonPatterns, ScreenPatterns }; \ No newline at end of file diff --git a/src/tokenizer/tokenizer.ts b/src/tokenizer/tokenizer.ts index c9dfea1..5b79b13 100644 --- a/src/tokenizer/tokenizer.ts +++ b/src/tokenizer/tokenizer.ts @@ -434,6 +434,7 @@ class DocumentTokenizer { if (backref !== undefined) { return escapeRegExpCharacters(backref); } + logCatMessage(LogLevel.Warning, LogCategory.Tokenizer, `Could not find content to replace backreference ${g1}!`); return ""; }); @@ -448,6 +449,13 @@ class DocumentTokenizer { let matchEnd = reEnd.exec(result.source); const contentMatches = new Stack(); + if (!matchEnd) { + // If no end match could be found, we'll need to expand the range to the end of the source + const reLastChar = /$(?!\r\n|\r|\n)/dg; + reLastChar.lastIndex = Math.max(0, result.source.length - 1); + matchEnd = reLastChar.exec(result.source); + } + if (matchEnd) { // Check if any child pattern has content that would extend the currently determined end match if (p._patternsRepo) { @@ -639,7 +647,7 @@ class DocumentTokenizer { const contentNode = new TreeNode(); // p.contentToken matches the range 'between'; after the end of beginMatch and before the start of endMatch - if (p.contentToken) { + if (p.contentToken && contentStart !== contentEnd) { contentNode.token = new Token(p.contentToken, this.positionAt(contentStart), this.positionAt(contentEnd)); } @@ -742,10 +750,16 @@ class DocumentTokenizer { while (lastMatchIndex < lastCharIndex) { const bestMatch = this.scanPattern(pattern, source, lastMatchIndex, cache); - if (!bestMatch || bestMatch.matchBegin.index >= lastCharIndex) { + if (!bestMatch) { break; // No valid match was found in the remaining text. Break the loop } + const matchBegin = bestMatch.matchBegin; + const beginMatchEnd = matchBegin.index + matchBegin[0].length; + if (matchBegin.index >= lastCharIndex || beginMatchEnd > lastCharIndex) { + break; + } + const failSafeIndex = lastMatchIndex; // Debug index to break in case of an infinite loop if (bestMatch.pattern._patternType === TokenPatternType.RangePattern) { @@ -756,8 +770,7 @@ class DocumentTokenizer { const matchEnd = bestMatch.matchEnd!; lastMatchIndex = matchEnd.index + matchEnd[0].length; } else { - const matchBegin = bestMatch.matchBegin; - lastMatchIndex = matchBegin.index + matchBegin[0].length; + lastMatchIndex = beginMatchEnd; } if (failSafeIndex === lastMatchIndex) { diff --git a/syntaxes/renpy.atl.tmLanguage.json b/syntaxes/renpy.atl.tmLanguage.json index 7e3bbb4..727ef75 100644 --- a/syntaxes/renpy.atl.tmLanguage.json +++ b/syntaxes/renpy.atl.tmLanguage.json @@ -378,17 +378,13 @@ }, "atl-block-tester": { - "patterns": [ - { - "contentName": "meta.atl-block.renpy", - "begin": "(?<=(^[ \\t]*)(?:camera|image|show|scene|transform|on|block|parallel|contains|choice)\\b.*?)(:)", - "beginCaptures": { - "2": { "name": "punctuation.section.atl.begin.renpy" } - }, - "end": "^(?=(?!\\1)[ \\t]*[^\\s#]|\\1[^\\s#])", - "patterns": [ { "include": "source.renpy.atl" } ] - } - ] + "contentName": "meta.atl-block.renpy", + "begin": "(?<=(^[ \\t]*)(?:camera|image|show|scene|transform|on|block|parallel|contains|choice)\\b.*?)(:)", + "beginCaptures": { + "2": { "name": "punctuation.section.atl.begin.renpy" } + }, + "end": "^(?=(?!\\1)[ \\t]*[^\\s#]|\\1[^\\s#])", + "patterns": [ { "include": "source.renpy.atl" } ] }, "transform": { diff --git a/syntaxes/renpy.python.tmLanguage.json b/syntaxes/renpy.python.tmLanguage.json index 03b450d..1b424d1 100644 --- a/syntaxes/renpy.python.tmLanguage.json +++ b/syntaxes/renpy.python.tmLanguage.json @@ -97,7 +97,7 @@ ] }, "docstring-statement": { - "begin": "^(?=\\s*[rR]?(\\'\\'\\'|\\\"\\\"\\\"|\\'|\\\"))", + "begin": "(?<=^[ \\t]*)(?=[rR]?(\\'\\'\\'|\\\"\\\"\\\"|\\'|\\\"))", "comment": "the string either terminates correctly or by the beginning of a new line (this is for single line docstrings that aren't terminated) AND it's not followed by another docstring", "end": "((?<=\\1)|^)(?!\\s*[rR]?(\\'\\'\\'|\\\"\\\"\\\"|\\'|\\\"))", "patterns": [ { "include": "#docstring" } ] @@ -315,7 +315,7 @@ "odd-function-call": { "comment": "A bit obscured function call where there may have been an arbitrary number of other operations to get the function.E.g. \"arr[idx](args)\"", "name": "meta.function-call.python", - "begin": "(?<=\\]|\\))\\s*(?=\\()", + "begin": "(?<=\\]|\\)|\")\\s*(?=\\()", "end": "(\\))", "endCaptures": { "1": { "name": "punctuation.definition.arguments.end.python" } @@ -419,21 +419,21 @@ }, "number-hex": { "name": "constant.numeric.hex.python", - "match": "(?