diff --git a/.prettierignore b/.prettierignore index fc48334..0c84c50 100644 --- a/.prettierignore +++ b/.prettierignore @@ -3,4 +3,7 @@ **/dist # Ignore all markdown files: -*.md \ No newline at end of file +*.md + +# Ignore all generated typescript files: +*.g.ts \ No newline at end of file diff --git a/examples/tokenizer_tests.rpy b/examples/tokenizer_tests.rpy new file mode 100644 index 0000000..0007513 --- /dev/null +++ b/examples/tokenizer_tests.rpy @@ -0,0 +1,24 @@ +init python hide early in Namespace: + + def sampleFunction(name, delay, position=(0,0)): + """ + This is a sample function. + """ +# test + renpy.pause(delay) + return name + + class Inventory: + """ + This is a fake inventory class. + """ + + def __init__(self): + self.items = [] + + def add(self, item): + """Add an item to the inventory.""" + self.items.append(item); + return + + \ No newline at end of file diff --git a/examples/unit_test.rpy b/examples/unit_test.rpy index c165657..fe4a663 100644 --- a/examples/unit_test.rpy +++ b/examples/unit_test.rpy @@ -837,7 +837,7 @@ queue sound "woof.mp3" volume 0.75 queue sound "woof.mp3" volume 1.0 - define audio.woof = "woof.mp23 + define audio.woof = "woof.mp3" # ... @@ -1853,7 +1853,7 @@ image big hello world = Text("Hello World", style="big") screen hello_world: - text "Hello, World" style "big" + text "Hello, World" style "big" at 0# Comment style my_text is text: size 40 @@ -3555,9 +3555,9 @@ #region Customizing the Keymap # see https://www.renpy.org/doc/html/keymap.html - init: - $ config.keymap['dismiss'].append('t') - $ config.keymap['dismiss'].remove('K_SPACE') + init python: + config.keymap['dismiss'].append('t') + config.keymap['dismiss'].remove('K_SPACE') config.keymap = dict( @@ -3678,59 +3678,59 @@ ) - config.pad_bindings = { - "pad_leftshoulder_press" : [ "rollback", ], - "pad_lefttrigger_pos" : [ "rollback", ], - "pad_back_press" : [ "rollback", ], + config.pad_bindings = { + "pad_leftshoulder_press" : [ "rollback", ], + "pad_lefttrigger_pos" : [ "rollback", ], + "pad_back_press" : [ "rollback", ], - "repeat_pad_leftshoulder_press" : [ "rollback", ], - "repeat_pad_lefttrigger_pos" : [ "rollback", ], - "repeat_pad_back_press" : [ "rollback", ], + "repeat_pad_leftshoulder_press" : [ "rollback", ], + "repeat_pad_lefttrigger_pos" : [ "rollback", ], + "repeat_pad_back_press" : [ "rollback", ], - "pad_guide_press" : [ "game_menu", ], - "pad_start_press" : [ "game_menu", ], + "pad_guide_press" : [ "game_menu", ], + "pad_start_press" : [ "game_menu", ], - "pad_y_press" : [ "hide_windows", ], + "pad_y_press" : [ "hide_windows", ], - "pad_rightshoulder_press" : [ "rollforward", ], - "repeat_pad_rightshoulder_press" : [ "rollforward", ], + "pad_rightshoulder_press" : [ "rollforward", ], + "repeat_pad_rightshoulder_press" : [ "rollforward", ], - "pad_righttrigger_pos" : [ "dismiss", "button_select", "bar_activate", "bar_deactivate" ], - "pad_a_press" : [ "dismiss", "button_select", "bar_activate", "bar_deactivate"], - "pad_b_press" : [ "button_alternate" ], + "pad_righttrigger_pos" : [ "dismiss", "button_select", "bar_activate", "bar_deactivate" ], + "pad_a_press" : [ "dismiss", "button_select", "bar_activate", "bar_deactivate"], + "pad_b_press" : [ "button_alternate" ], - "pad_dpleft_press" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "pad_leftx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "pad_rightx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "pad_dpleft_press" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "pad_leftx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "pad_rightx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "pad_dpright_press" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "pad_leftx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "pad_rightx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "pad_dpright_press" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "pad_leftx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "pad_rightx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "pad_dpup_press" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "pad_lefty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "pad_righty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "pad_dpup_press" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "pad_lefty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "pad_righty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "pad_dpdown_press" : [ "focus_down", "bar_down", "viewport_downarrow" ], - "pad_lefty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], - "pad_righty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], + "pad_dpdown_press" : [ "focus_down", "bar_down", "viewport_downarrow" ], + "pad_lefty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], + "pad_righty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], - "repeat_pad_dpleft_press" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "repeat_pad_leftx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "repeat_pad_rightx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "repeat_pad_dpleft_press" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "repeat_pad_leftx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], + "repeat_pad_rightx_neg" : [ "focus_left", "bar_left", "viewport_leftarrow" ], - "repeat_pad_dpright_press" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "repeat_pad_leftx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "repeat_pad_rightx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "repeat_pad_dpright_press" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "repeat_pad_leftx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], + "repeat_pad_rightx_pos" : [ "focus_right", "bar_right", "viewport_rightarrow" ], - "repeat_pad_dpup_press" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "repeat_pad_lefty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "repeat_pad_righty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "repeat_pad_dpup_press" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "repeat_pad_lefty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], + "repeat_pad_righty_neg" : [ "focus_up", "bar_up", "viewport_uparrow" ], - "repeat_pad_dpdown_press" : [ "focus_down", "bar_down", "viewport_downarrow" ], - "repeat_pad_lefty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], - "repeat_pad_righty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], - } + "repeat_pad_dpdown_press" : [ "focus_down", "bar_down", "viewport_downarrow" ], + "repeat_pad_lefty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], + "repeat_pad_righty_pos" : [ "focus_down", "bar_down", "viewport_downarrow" ], + } #endregion Customizing the Keymap diff --git a/package.json b/package.json index 9e701c9..6741b33 100644 --- a/package.json +++ b/package.json @@ -71,6 +71,10 @@ { "scopeName": "source.renpy.python", "path": "./syntaxes/renpy.python.tmLanguage.json" + }, + { + "scopeName": "source.renpy.atl", + "path": "./syntaxes/renpy.atl.tmLanguage.json" } ], "snippets": [ diff --git a/src/color.ts b/src/color.ts index 86dcd0e..4f21263 100644 --- a/src/color.ts +++ b/src/color.ts @@ -1,9 +1,13 @@ // Color conversion methods for Color provider import { CancellationToken, Color, ColorInformation, ColorPresentation, DocumentColorProvider, Range, TextDocument, TextEdit } from "vscode"; +import { ValueEqualsSet } from "./utilities/hashset"; +import { tokenizeDocument } from "./tokenizer/tokenizer"; +import { LiteralTokenType } from "./tokenizer/renpy-tokens"; +import { TextMateRule, injectCustomTextmateTokens } from "./decorator"; +/*import { tokenizeDocument } from "./tokenizer/tokenizer"; import { injectCustomTextmateTokens, TextMateRule } from "./decorator"; import { LiteralTokenType } from "./tokenizer/renpy-tokens"; -import { tokenizeDocument } from "./tokenizer/tokenizer"; -import { ValueEqualsSet } from "./utilities/hashset"; +import { ValueEqualsSet } from "./utilities/hashset";*/ export class RenpyColorProvider implements DocumentColorProvider { public provideDocumentColors(document: TextDocument, token: CancellationToken): Thenable { @@ -112,15 +116,15 @@ export function getColorPresentations(color: Color, document: TextDocument, rang } export function injectCustomColorStyles(document: TextDocument) { + // Disabled until filter is added to the tree class const documentTokens = tokenizeDocument(document); - // TODO: Should probably make sure this constant is actually part of a tag, but for now this is fine. - const colorTags = documentTokens.filter((x) => x.tokenType === LiteralTokenType.Color); + const colorTags = documentTokens.filter((x) => x.token?.tokenType === LiteralTokenType.Color); const colorRules = new ValueEqualsSet(); // Build the new rules for this file - colorTags.forEach((color) => { - const lowerColor = document.getText(color.getRange()).toLowerCase(); + colorTags.forEach((colorNode) => { + const lowerColor = document.getText(colorNode.token?.getVSCodeRange()).toLowerCase(); const newRule = new TextMateRule(`renpy.meta.color.${lowerColor}`, { foreground: lowerColor }); colorRules.add(newRule); }); diff --git a/src/tokenizer/atl-token-patterns.g.ts b/src/tokenizer/atl-token-patterns.g.ts new file mode 100644 index 0000000..acbad44 --- /dev/null +++ b/src/tokenizer/atl-token-patterns.g.ts @@ -0,0 +1,399 @@ +/* eslint-disable no-useless-escape */ +/* eslint-disable no-useless-backreference */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR +// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. +// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. +// Last generated: 01/06/2023 14:57:48 (UTC+0) + +import { KeywordTokenType, EntityTokenType, MetaTokenType, CharacterTokenType } from "./renpy-tokens"; +import { TokenPattern } from "./token-pattern-types"; + +export const atl: TokenPattern = { + // https://www.renpy.org/doc/html/atl.html#atl-syntax-and-semantics + patterns: [ + ] +}; + +export const atlBuildInProperties: TokenPattern = { + // https://www.renpy.org/doc/html/atl.html#list-of-transform-properties + patterns: [ + { + debugName: "atlBuildInProperties.patterns![0]", + + // Special manipulation keywords + match: /\b(? { + const token = node.token; + if (!token) { + return; + } - tokens.forEach((token) => { - const range = token.getRange(); + const range = token.getVSCodeRange(); const content = activeEditor?.document.getText(range); const decoration: DecorationOptions = { @@ -290,15 +303,16 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case KeywordTokenType.Fadeout: case KeywordTokenType.Set: // Renpy sub expression keywords case KeywordTokenType.Expression: - case KeywordTokenType.At: - case KeywordTokenType.As: - case KeywordTokenType.With: - case KeywordTokenType.OnLayer: - case KeywordTokenType.ZOrder: - case KeywordTokenType.Behind: case KeywordTokenType.Animation: case KeywordTokenType.From: + case KeywordTokenType.Time: + case KeywordTokenType.Repeat: case KeywordTokenType.DollarSign: + case KeywordTokenType.Sensitive: + case KeywordTokenType.Text: + case KeywordTokenType.Other: + case KeywordTokenType.OtherPython: + case KeywordTokenType.OtherAudio: case KeywordTokenType.Warp: // ATL keywords case KeywordTokenType.Circles: case KeywordTokenType.Clockwise: @@ -306,13 +320,20 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case KeywordTokenType.Event: case KeywordTokenType.On: case KeywordTokenType.Function: + case KeywordTokenType.Import: // Python keywords + case KeywordTokenType.Class: + case KeywordTokenType.Metaclass: + case KeywordTokenType.Lambda: + case KeywordTokenType.Async: + case KeywordTokenType.Def: + case KeywordTokenType.Global: + case KeywordTokenType.Nonlocal: case LiteralTokenType.Boolean: // Language keywords case OperatorTokenType.And: case OperatorTokenType.Or: case OperatorTokenType.Not: case OperatorTokenType.Is: case OperatorTokenType.IsNot: - case OperatorTokenType.In: case OperatorTokenType.NotIn: keywords.push(decoration); break; @@ -320,7 +341,8 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case KeywordTokenType.If: // Conditional control flow keywords case KeywordTokenType.Elif: case KeywordTokenType.Else: - case KeywordTokenType.For: // Control flow keywords + case KeywordTokenType.In: // Control flow keywords + case KeywordTokenType.For: case KeywordTokenType.While: case KeywordTokenType.Pass: case KeywordTokenType.Return: @@ -330,11 +352,19 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case KeywordTokenType.Parallel: case KeywordTokenType.Block: case KeywordTokenType.Choice: + case KeywordTokenType.At: // Renpy control flow keywords + case KeywordTokenType.As: + case KeywordTokenType.With: + case KeywordTokenType.Onlayer: + case KeywordTokenType.Zorder: + case KeywordTokenType.Behind: controlKeywords.push(decoration); break; - case EntityTokenType.Class: // Types - case EntityTokenType.Namespace: + case EntityTokenType.ClassName: // Types + case EntityTokenType.InheritedClassName: + case EntityTokenType.TypeName: + case EntityTokenType.NamespaceName: types.push(decoration); break; @@ -346,42 +376,58 @@ ${(decoration.hoverMessage as MarkdownString).value}`); // Variables case EntityTokenType.VariableName: + case EntityTokenType.ImageName: + case EntityTokenType.TextName: + case EntityTokenType.AudioName: + case EntityTokenType.CharacterName: case EntityTokenType.PropertyName: variables.push(decoration); break; // Other entities - case EntityTokenType.Tag: + case EntityTokenType.TagName: otherEntities.push(decoration); break; // Comments case MetaTokenType.Comment: case MetaTokenType.CommentCodeTag: + case MetaTokenType.CommentRegionTag: + case MetaTokenType.TypehintComment: + case MetaTokenType.TypehintDirective: + case MetaTokenType.TypehintIgnore: + case MetaTokenType.TypehintType: + case MetaTokenType.TypehintPunctuation: + case MetaTokenType.TypehintVariable: + case MetaTokenType.Docstring: comments.push(decoration); break; + case MetaTokenType.StringBegin: + case MetaTokenType.StringEnd: case MetaTokenType.CodeBlock: case MetaTokenType.PythonLine: case MetaTokenType.PythonBlock: case MetaTokenType.Arguments: case MetaTokenType.EmptyString: + case MetaTokenType.StringTag: case MetaTokenType.TagBlock: + case MetaTokenType.TaggedString: case MetaTokenType.Placeholder: case MetaTokenType.MenuStatement: case MetaTokenType.MenuBlock: case MetaTokenType.MenuOption: case MetaTokenType.MenuOptionBlock: + case MetaTokenType.BehindStatement: + case MetaTokenType.OnlayerStatement: case MetaTokenType.CameraStatement: case MetaTokenType.SceneStatement: case MetaTokenType.ShowStatement: case MetaTokenType.ImageStatement: - case MetaTokenType.NarratorSayStatement: - case MetaTokenType.SayStatement: - case MetaTokenType.CharacterNameString: case MetaTokenType.CallStatement: case MetaTokenType.JumpStatement: case MetaTokenType.PlayAudioStatement: + case MetaTokenType.QueueAudioStatement: case MetaTokenType.StopAudioStatement: case MetaTokenType.LabelStatement: case MetaTokenType.LabelCall: @@ -389,6 +435,23 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case MetaTokenType.AtStatement: case MetaTokenType.AsStatement: case MetaTokenType.WithStatement: + case MetaTokenType.ScreenStatement: + case MetaTokenType.ScreenSensitive: + case MetaTokenType.ScreenFrame: + case MetaTokenType.ScreenWindow: + case MetaTokenType.ScreenText: + case MetaTokenType.ScreenBlock: + case MetaTokenType.NarratorSayStatement: + case MetaTokenType.SayStatement: + case MetaTokenType.CharacterNameString: + case MetaTokenType.SayNarrator: + case MetaTokenType.SayCharacter: + case MetaTokenType.AtParameters: + case MetaTokenType.AsParameters: + case MetaTokenType.BehindParameters: + case MetaTokenType.OnlayerParameters: + case MetaTokenType.WithParameters: + case MetaTokenType.ZorderParameters: case MetaTokenType.ATLBlock: case MetaTokenType.ATLChoiceBlock: case MetaTokenType.ATLContains: @@ -397,6 +460,47 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case MetaTokenType.ATLFunction: case MetaTokenType.ATLWarper: case MetaTokenType.ATLOn: + case MetaTokenType.MemberAccess: + case MetaTokenType.ItemAccess: + case MetaTokenType.IndexedName: + case MetaTokenType.Attribute: + case MetaTokenType.ClassDefinition: + case MetaTokenType.ClassInheritance: + case MetaTokenType.FunctionDefinition: + case MetaTokenType.LambdaFunction: + case MetaTokenType.FunctionLambdaParameters: + case MetaTokenType.FunctionParameters: + case MetaTokenType.FunctionDecorator: + case MetaTokenType.FunctionCall: + case MetaTokenType.FunctionCallGeneric: + case MetaTokenType.Fstring: + case MetaTokenType.ControlFlowKeyword: + case MetaTokenType.LogicalOperatorKeyword: + case MetaTokenType.Operator: + case MetaTokenType.ArithmeticOperator: + case MetaTokenType.BitwiseOperatorKeyword: + case MetaTokenType.ComparisonOperatorKeyword: + case MetaTokenType.ConstantLiteral: + case MetaTokenType.ConstantNumeric: + case MetaTokenType.ConstantCaps: + case MetaTokenType.BuiltinExceptionType: + case MetaTokenType.BuiltinType: + case MetaTokenType.MagicVariable: + case MetaTokenType.EscapeSequence: + case MetaTokenType.FormatPercent: + case MetaTokenType.FormatBrace: + case MetaTokenType.StringStorageType: + case MetaTokenType.FormatStorageType: + case MetaTokenType.ImaginaryNumberStorageType: + case MetaTokenType.NumberStorageType: + case MetaTokenType.ClassStorageType: + case MetaTokenType.CommentBegin: + case MetaTokenType.CommentEnd: + case MetaTokenType.Backreference: + case MetaTokenType.BackreferenceNamed: + case MetaTokenType.CharacterSet: + case MetaTokenType.Named: + case MetaTokenType.ModifierFlagStorageType: otherMeta.push(decoration); break; @@ -439,7 +543,7 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case OperatorTokenType.BitwiseNot: case OperatorTokenType.BitwiseLeftShift: case OperatorTokenType.BitwiseRightShift: - case OperatorTokenType.Assign: // Assignment operators + case OperatorTokenType.Assignment: // Assignment operators case OperatorTokenType.PlusAssign: case OperatorTokenType.MinusAssign: case OperatorTokenType.MultiplyAssign: @@ -461,13 +565,17 @@ ${(decoration.hoverMessage as MarkdownString).value}`); operators.push(decoration); break; - case CharacterTokenType.WhiteSpace: + case CharacterTokenType.Whitespace: case CharacterTokenType.NewLine: case CharacterTokenType.Period: case CharacterTokenType.Colon: case CharacterTokenType.Semicolon: case CharacterTokenType.Comma: case CharacterTokenType.Hashtag: + case CharacterTokenType.Caret: + case CharacterTokenType.DollarSymbol: + case CharacterTokenType.AtSymbol: + case CharacterTokenType.EqualsSymbol: case CharacterTokenType.Quote: case CharacterTokenType.DoubleQuote: case CharacterTokenType.BackQuote: @@ -499,6 +607,11 @@ ${(decoration.hoverMessage as MarkdownString).value}`); case MetaTokenType.Invalid: errors.push(decoration); break; + + case MetaTokenType.Deprecated: + deprecated.push(decoration); + break; + default: throw new Error(`Unhandled token case: ${tokenTypeToStringMap[token.tokenType]}(id: ${token.tokenType})`); } @@ -527,6 +640,7 @@ ${(decoration.hoverMessage as MarkdownString).value}`); activeEditor.setDecorations(escCharacterDecorationType, escCharacters); activeEditor.setDecorations(errorDecorationType, errors); + activeEditor.setDecorations(deprecatedDecorationType, deprecated); } function triggerUpdateDecorations(throttle = false) { @@ -557,6 +671,7 @@ const tokenTypeDefinitions: EnumToString = { Early: { name: "Early", value: KeywordTokenType.Early }, Define: { name: "Define", value: KeywordTokenType.Define }, Default: { name: "Default", value: KeywordTokenType.Default }, + Label: { name: "Label", value: KeywordTokenType.Label }, Menu: { name: "Menu", value: KeywordTokenType.Menu }, Pause: { name: "Pause", value: KeywordTokenType.Pause }, @@ -585,19 +700,25 @@ const tokenTypeDefinitions: EnumToString = { At: { name: "At", value: KeywordTokenType.At }, As: { name: "As", value: KeywordTokenType.As }, With: { name: "With", value: KeywordTokenType.With }, - OnLayer: { name: "OnLayer", value: KeywordTokenType.OnLayer }, - ZOrder: { name: "ZOrder", value: KeywordTokenType.ZOrder }, + Onlayer: { name: "Onlayer", value: KeywordTokenType.Onlayer }, + Zorder: { name: "Zorder", value: KeywordTokenType.Zorder }, Behind: { name: "Behind", value: KeywordTokenType.Behind }, Animation: { name: "Animation", value: KeywordTokenType.Animation }, From: { name: "From", value: KeywordTokenType.From }, Time: { name: "Time", value: KeywordTokenType.Time }, Repeat: { name: "Repeat", value: KeywordTokenType.Repeat }, DollarSign: { name: "DollarSign", value: KeywordTokenType.DollarSign }, + Sensitive: { name: "Sensitive", value: KeywordTokenType.Sensitive }, + Text: { name: "Text", value: KeywordTokenType.Text }, + Other: { name: "Other", value: KeywordTokenType.Other }, + OtherPython: { name: "OtherPython", value: KeywordTokenType.OtherPython }, + OtherAudio: { name: "OtherAudio", value: KeywordTokenType.OtherAudio }, If: { name: "If", value: KeywordTokenType.If }, Elif: { name: "Elif", value: KeywordTokenType.Elif }, Else: { name: "Else", value: KeywordTokenType.Else }, + In: { name: "In", value: KeywordTokenType.In }, For: { name: "For", value: KeywordTokenType.For }, While: { name: "While", value: KeywordTokenType.While }, Pass: { name: "Pass", value: KeywordTokenType.Pass }, @@ -617,12 +738,28 @@ const tokenTypeDefinitions: EnumToString = { On: { name: "On", value: KeywordTokenType.On }, Function: { name: "Function", value: KeywordTokenType.Function }, - Class: { name: "Class", value: EntityTokenType.Class }, - Namespace: { name: "Namespace", value: EntityTokenType.Namespace }, + Import: { name: "Import", value: KeywordTokenType.Import }, + Class: { name: "Class", value: KeywordTokenType.Class }, + Metaclass: { name: "Metaclass", value: KeywordTokenType.Metaclass }, + Lambda: { name: "Lambda", value: KeywordTokenType.Lambda }, + Async: { name: "Async", value: KeywordTokenType.Async }, + Def: { name: "Def", value: KeywordTokenType.Def }, + Global: { name: "Global", value: KeywordTokenType.Global }, + Nonlocal: { name: "Nonlocal", value: KeywordTokenType.Nonlocal }, + + ClassName: { name: "ClassName", value: EntityTokenType.ClassName }, + InheritedClassName: { name: "InheritedClassName", value: EntityTokenType.InheritedClassName }, + TypeName: { name: "TypeName", value: EntityTokenType.TypeName }, + NamespaceName: { name: "NamespaceName", value: EntityTokenType.NamespaceName }, FunctionName: { name: "FunctionName", value: EntityTokenType.FunctionName }, - Tag: { name: "Tag", value: EntityTokenType.Tag }, + TagName: { name: "TagName", value: EntityTokenType.TagName }, VariableName: { name: "VariableName", value: EntityTokenType.VariableName }, + ImageName: { name: "ImageName", value: EntityTokenType.ImageName }, + TextName: { name: "TextName", value: EntityTokenType.TextName }, + AudioName: { name: "AudioName", value: EntityTokenType.AudioName }, + CharacterName: { name: "CharacterName", value: EntityTokenType.CharacterName }, + EventName: { name: "EventName", value: EntityTokenType.EventName }, PropertyName: { name: "PropertyName", value: EntityTokenType.PropertyName }, @@ -651,7 +788,7 @@ const tokenTypeDefinitions: EnumToString = { BitwiseLeftShift: { name: "BitwiseLeftShift", value: OperatorTokenType.BitwiseLeftShift }, BitwiseRightShift: { name: "BitwiseRightShift", value: OperatorTokenType.BitwiseRightShift }, - Assign: { name: "Assign", value: OperatorTokenType.Assign }, + Assignment: { name: "Assignment", value: OperatorTokenType.Assignment }, PlusAssign: { name: "PlusAssign", value: OperatorTokenType.PlusAssign }, MinusAssign: { name: "MinusAssign", value: OperatorTokenType.MinusAssign }, MultiplyAssign: { name: "MultiplyAssign", value: OperatorTokenType.MultiplyAssign }, @@ -679,9 +816,21 @@ const tokenTypeDefinitions: EnumToString = { Is: { name: "Is", value: OperatorTokenType.Is }, IsNot: { name: "IsNot", value: OperatorTokenType.IsNot }, - In: { name: "In", value: OperatorTokenType.In }, NotIn: { name: "NotIn", value: OperatorTokenType.NotIn }, + Unpacking: { name: "Unpacking", value: OperatorTokenType.Unpacking }, + PositionalParameter: { name: "PositionalParameter", value: OperatorTokenType.PositionalParameter }, + + Quantifier: { name: "Quantifier", value: OperatorTokenType.Quantifier }, + Disjunction: { name: "Disjunction", value: OperatorTokenType.Disjunction }, + Negation: { name: "Negation", value: OperatorTokenType.Negation }, + Lookahead: { name: "Lookahead", value: OperatorTokenType.Lookahead }, + LookaheadNegative: { name: "LookaheadNegative", value: OperatorTokenType.LookaheadNegative }, + Lookbehind: { name: "Lookbehind", value: OperatorTokenType.Lookbehind }, + LookbehindNegative: { name: "LookbehindNegative", value: OperatorTokenType.LookbehindNegative }, + Conditional: { name: "Conditional", value: OperatorTokenType.Conditional }, + ConditionalNegative: { name: "ConditionalNegative", value: OperatorTokenType.ConditionalNegative }, + Unknown: { name: "Unknown", value: CharacterTokenType.Unknown }, OpenParentheses: { name: "OpenParentheses", value: CharacterTokenType.OpenParentheses }, @@ -693,7 +842,7 @@ const tokenTypeDefinitions: EnumToString = { OpenSquareBracket: { name: "OpenSquareBracket", value: CharacterTokenType.OpenSquareBracket }, CloseSquareBracket: { name: "CloseSquareBracket", value: CharacterTokenType.CloseSquareBracket }, - WhiteSpace: { name: "WhiteSpace", value: CharacterTokenType.WhiteSpace }, + Whitespace: { name: "Whitespace", value: CharacterTokenType.Whitespace }, NewLine: { name: "NewLine", value: CharacterTokenType.NewLine }, Period: { name: "Period", value: CharacterTokenType.Period }, @@ -701,6 +850,10 @@ const tokenTypeDefinitions: EnumToString = { Semicolon: { name: "Semicolon", value: CharacterTokenType.Semicolon }, Comma: { name: "Comma", value: CharacterTokenType.Comma }, Hashtag: { name: "Hashtag", value: CharacterTokenType.Hashtag }, + Caret: { name: "Caret", value: CharacterTokenType.Caret }, + DollarSymbol: { name: "DollarSymbol", value: CharacterTokenType.DollarSymbol }, + AtSymbol: { name: "AtSymbol", value: CharacterTokenType.AtSymbol }, + EqualsSymbol: { name: "EqualsSymbol", value: CharacterTokenType.EqualsSymbol }, Quote: { name: "Quote", value: CharacterTokenType.Quote }, DoubleQuote: { name: "DoubleQuote", value: CharacterTokenType.DoubleQuote }, @@ -719,16 +872,31 @@ const tokenTypeDefinitions: EnumToString = { EscOpenBracket: { name: "EscOpenBracket", value: EscapedCharacterTokenType.EscOpenBracket }, Invalid: { name: "Invalid", value: MetaTokenType.Invalid }, + Deprecated: { name: "Deprecated", value: MetaTokenType.Deprecated }, + Comment: { name: "Comment", value: MetaTokenType.Comment }, - CodeBlock: { name: "CodeBlock", value: MetaTokenType.CodeBlock }, + CommentCodeTag: { name: "CommentCodeTag", value: MetaTokenType.CommentCodeTag }, + CommentRegionTag: { name: "CommentRegionTag", value: MetaTokenType.CommentRegionTag }, + TypehintComment: { name: "TypehintComment", value: MetaTokenType.TypehintComment }, + TypehintDirective: { name: "TypehintDirective", value: MetaTokenType.TypehintDirective }, + TypehintIgnore: { name: "TypehintIgnore", value: MetaTokenType.TypehintIgnore }, + TypehintType: { name: "TypehintType", value: MetaTokenType.TypehintType }, + TypehintPunctuation: { name: "TypehintPunctuation", value: MetaTokenType.TypehintPunctuation }, + TypehintVariable: { name: "TypehintVariable", value: MetaTokenType.TypehintVariable }, + Docstring: { name: "Docstring", value: MetaTokenType.Docstring }, + + StringBegin: { name: "StringBegin", value: MetaTokenType.StringBegin }, + StringEnd: { name: "StringEnd", value: MetaTokenType.StringEnd }, + CodeBlock: { name: "CodeBlock", value: MetaTokenType.CodeBlock }, PythonLine: { name: "PythonLine", value: MetaTokenType.PythonLine }, PythonBlock: { name: "PythonBlock", value: MetaTokenType.PythonBlock }, Arguments: { name: "Arguments", value: MetaTokenType.Arguments }, - CommentCodeTag: { name: "CommentCodeTag", value: MetaTokenType.CommentCodeTag }, EmptyString: { name: "EmptyString", value: MetaTokenType.EmptyString }, + StringTag: { name: "StringTag", value: MetaTokenType.StringTag }, TagBlock: { name: "TagBlock", value: MetaTokenType.TagBlock }, + TaggedString: { name: "TaggedString", value: MetaTokenType.TaggedString }, Placeholder: { name: "Placeholder", value: MetaTokenType.Placeholder }, MenuStatement: { name: "MenuStatement", value: MetaTokenType.MenuStatement }, @@ -736,28 +904,48 @@ const tokenTypeDefinitions: EnumToString = { MenuOption: { name: "MenuOption", value: MetaTokenType.MenuOption }, MenuOptionBlock: { name: "MenuOptionBlock", value: MetaTokenType.MenuOptionBlock }, + LabelStatement: { name: "LabelStatement", value: MetaTokenType.LabelStatement }, + LabelCall: { name: "LabelCall", value: MetaTokenType.LabelCall }, + LabelAccess: { name: "LabelAccess", value: MetaTokenType.LabelAccess }, + + BehindStatement: { name: "BehindStatement", value: MetaTokenType.BehindStatement }, + OnlayerStatement: { name: "OnlayerStatement", value: MetaTokenType.OnlayerStatement }, + ZorderStatement: { name: "ZorderStatement", value: MetaTokenType.ZorderStatement }, + AtStatement: { name: "AtStatement", value: MetaTokenType.AtStatement }, + AsStatement: { name: "AsStatement", value: MetaTokenType.AsStatement }, + WithStatement: { name: "WithStatement", value: MetaTokenType.WithStatement }, + + ImageStatement: { name: "ImageStatement", value: MetaTokenType.ImageStatement }, CameraStatement: { name: "CameraStatement", value: MetaTokenType.CameraStatement }, SceneStatement: { name: "SceneStatement", value: MetaTokenType.SceneStatement }, ShowStatement: { name: "ShowStatement", value: MetaTokenType.ShowStatement }, - ImageStatement: { name: "ImageStatement", value: MetaTokenType.ImageStatement }, - NarratorSayStatement: { name: "NarratorSayStatement", value: MetaTokenType.NarratorSayStatement }, - SayStatement: { name: "SayStatement", value: MetaTokenType.SayStatement }, - CharacterNameString: { name: "CharacterNameString", value: MetaTokenType.CharacterNameString }, - - CallStatement: { name: "CallStatement", value: MetaTokenType.CallStatement }, JumpStatement: { name: "JumpStatement", value: MetaTokenType.JumpStatement }, + CallStatement: { name: "CallStatement", value: MetaTokenType.CallStatement }, PlayAudioStatement: { name: "PlayAudioStatement", value: MetaTokenType.PlayAudioStatement }, + QueueAudioStatement: { name: "QueueAudioStatement", value: MetaTokenType.QueueAudioStatement }, StopAudioStatement: { name: "StopAudioStatement", value: MetaTokenType.StopAudioStatement }, - LabelStatement: { name: "LabelStatement", value: MetaTokenType.LabelStatement }, - LabelCall: { name: "LabelCall", value: MetaTokenType.LabelCall }, - LabelAccess: { name: "LabelAccess", value: MetaTokenType.LabelAccess }, + ScreenStatement: { name: "ScreenStatement", value: MetaTokenType.ScreenStatement }, + ScreenSensitive: { name: "ScreenSensitive", value: MetaTokenType.ScreenSensitive }, + ScreenFrame: { name: "ScreenFrame", value: MetaTokenType.ScreenFrame }, + ScreenWindow: { name: "ScreenWindow", value: MetaTokenType.ScreenWindow }, + ScreenText: { name: "ScreenText", value: MetaTokenType.ScreenText }, + ScreenBlock: { name: "ScreenBlock", value: MetaTokenType.ScreenBlock }, - AtStatement: { name: "AtStatement", value: MetaTokenType.AtStatement }, - AsStatement: { name: "AsStatement", value: MetaTokenType.AsStatement }, - WithStatement: { name: "WithStatement", value: MetaTokenType.WithStatement }, + NarratorSayStatement: { name: "NarratorSayStatement", value: MetaTokenType.NarratorSayStatement }, + SayStatement: { name: "SayStatement", value: MetaTokenType.SayStatement }, + CharacterNameString: { name: "CharacterNameString", value: MetaTokenType.CharacterNameString }, + SayNarrator: { name: "SayNarrator", value: MetaTokenType.SayNarrator }, + SayCharacter: { name: "SayCharacter", value: MetaTokenType.SayCharacter }, + + AtParameters: { name: "AtParameters", value: MetaTokenType.AtParameters }, + AsParameters: { name: "AsParameters", value: MetaTokenType.AsParameters }, + BehindParameters: { name: "BehindParameters", value: MetaTokenType.BehindParameters }, + OnlayerParameters: { name: "OnlayerParameters", value: MetaTokenType.OnlayerParameters }, + WithParameters: { name: "WithParameters", value: MetaTokenType.WithParameters }, + ZorderParameters: { name: "ZorderParameters", value: MetaTokenType.ZorderParameters }, ATLBlock: { name: "ATLBlock", value: MetaTokenType.ATLBlock }, ATLChoiceBlock: { name: "ATLChoiceBlock", value: MetaTokenType.ATLChoiceBlock }, @@ -767,6 +955,48 @@ const tokenTypeDefinitions: EnumToString = { ATLFunction: { name: "ATLFunction", value: MetaTokenType.ATLFunction }, ATLWarper: { name: "ATLWarper", value: MetaTokenType.ATLWarper }, ATLOn: { name: "ATLOn", value: MetaTokenType.ATLOn }, + + MemberAccess: { name: "MemberAccess", value: MetaTokenType.MemberAccess }, + ItemAccess: { name: "ItemAccess", value: MetaTokenType.ItemAccess }, + IndexedName: { name: "IndexedName", value: MetaTokenType.IndexedName }, + Attribute: { name: "Attribute", value: MetaTokenType.Attribute }, + ClassDefinition: { name: "ClassDefinition", value: MetaTokenType.ClassDefinition }, + ClassInheritance: { name: "ClassInheritance", value: MetaTokenType.ClassInheritance }, + FunctionDefinition: { name: "FunctionDefinition", value: MetaTokenType.FunctionDefinition }, + LambdaFunction: { name: "LambdaFunction", value: MetaTokenType.LambdaFunction }, + FunctionLambdaParameters: { name: "FunctionLambdaParameters", value: MetaTokenType.FunctionLambdaParameters }, + FunctionParameters: { name: "FunctionParameters", value: MetaTokenType.FunctionParameters }, + FunctionDecorator: { name: "FunctionDecorator", value: MetaTokenType.FunctionDecorator }, + FunctionCall: { name: "FunctionCall", value: MetaTokenType.FunctionCall }, + FunctionCallGeneric: { name: "FunctionCallGeneric", value: MetaTokenType.FunctionCallGeneric }, + Fstring: { name: "Fstring", value: MetaTokenType.Fstring }, + ControlFlowKeyword: { name: "ControlFlowKeyword", value: MetaTokenType.ControlFlowKeyword }, + LogicalOperatorKeyword: { name: "LogicalOperatorKeyword", value: MetaTokenType.LogicalOperatorKeyword }, + Operator: { name: "Operator", value: MetaTokenType.Operator }, + ArithmeticOperator: { name: "ArithmeticOperator", value: MetaTokenType.ArithmeticOperator }, + BitwiseOperatorKeyword: { name: "BitwiseOperatorKeyword", value: MetaTokenType.BitwiseOperatorKeyword }, + ComparisonOperatorKeyword: { name: "ComparisonOperatorKeyword", value: MetaTokenType.ComparisonOperatorKeyword }, + ConstantLiteral: { name: "ConstantLiteral", value: MetaTokenType.ConstantLiteral }, + ConstantNumeric: { name: "ConstantNumeric", value: MetaTokenType.ConstantNumeric }, + ConstantCaps: { name: "ConstantCaps", value: MetaTokenType.ConstantCaps }, + BuiltinExceptionType: { name: "BuiltinExceptionType", value: MetaTokenType.BuiltinExceptionType }, + BuiltinType: { name: "BuiltinType", value: MetaTokenType.BuiltinType }, + MagicVariable: { name: "MagicVariable", value: MetaTokenType.MagicVariable }, + EscapeSequence: { name: "EscapeSequence", value: MetaTokenType.EscapeSequence }, + FormatPercent: { name: "FormatPercent", value: MetaTokenType.FormatPercent }, + FormatBrace: { name: "FormatBrace", value: MetaTokenType.FormatBrace }, + StringStorageType: { name: "StringStorageType", value: MetaTokenType.StringStorageType }, + FormatStorageType: { name: "FormatStorageType", value: MetaTokenType.FormatStorageType }, + ImaginaryNumberStorageType: { name: "ImaginaryNumberStorageType", value: MetaTokenType.ImaginaryNumberStorageType }, + NumberStorageType: { name: "NumberStorageType", value: MetaTokenType.NumberStorageType }, + ClassStorageType: { name: "ClassStorageType", value: MetaTokenType.ClassStorageType }, + CommentBegin: { name: "CommentBegin", value: MetaTokenType.CommentBegin }, + CommentEnd: { name: "CommentEnd", value: MetaTokenType.CommentEnd }, + Backreference: { name: "Backreference", value: MetaTokenType.Backreference }, + BackreferenceNamed: { name: "BackreferenceNamed", value: MetaTokenType.BackreferenceNamed }, + CharacterSet: { name: "CharacterSet", value: MetaTokenType.CharacterSet }, + Named: { name: "Named", value: MetaTokenType.Named }, + ModifierFlagStorageType: { name: "ModifierFlagStorageType", value: MetaTokenType.ModifierFlagStorageType }, }; export const tokenTypeToStringMap = Object.fromEntries(Object.entries(tokenTypeDefinitions).map(([, v]) => [v.value, v.name])); diff --git a/src/tokenizer/python-token-patterns.g.ts b/src/tokenizer/python-token-patterns.g.ts new file mode 100644 index 0000000..71dac1e --- /dev/null +++ b/src/tokenizer/python-token-patterns.g.ts @@ -0,0 +1,3169 @@ +/* eslint-disable no-useless-escape */ +/* eslint-disable no-useless-backreference */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR +// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. +// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. +// Last generated: 01/06/2023 14:57:48 (UTC+0) + +import { MetaTokenType, KeywordTokenType, EntityTokenType, CharacterTokenType, OperatorTokenType, LiteralTokenType } from "./renpy-tokens"; +import { TokenPattern } from "./token-pattern-types"; + +export const python: TokenPattern = { + patterns: [ + ] +}; + +export const impossible: TokenPattern = { + debugName: "impossible", + + // This is a special rule that should be used where no match is desired. It is not a good idea to match something like '1{0}' because in some cases that can result in infinite loops in token generation. So the rule instead matches and impossible expression to allow a match to fail and move to the next token. + match: /$.^/gm, +}; + +export const statement: TokenPattern = { + patterns: [ + ] +}; + +export const semicolon: TokenPattern = { + patterns: [ + { + debugName: "semicolon.patterns![0]", + + token: MetaTokenType.Deprecated, /*invalid.deprecated.semicolon.python*/ + match: /\;$/gm, + }, + ] +}; + +export const comments: TokenPattern = { + patterns: [ + { + debugName: "comments.patterns![0]", + + token: MetaTokenType.Comment, /*comment.line.number-sign.renpy*/ + match: /^[ \t]*(#[ \t]*(?:end)?region)\b.*$/dgm, + captures: { + 1: { token: MetaTokenType.CommentRegionTag, /*punctuation.definition.tag.region*/ }, + }, + }, + { + debugName: "comments.patterns![1]", + + token: MetaTokenType.Comment, /*comment.line.number-sign.python*/ + contentToken: MetaTokenType.TypehintComment, /*meta.typehint.comment.python*/ + begin: /(?:\#\s*(type:)\s*(?!$|\#))/dgm, + beginCaptures: { + 0: { token: MetaTokenType.TypehintComment, /*meta.typehint.comment.python*/ }, + 1: { token: MetaTokenType.TypehintDirective, /*comment.typehint.directive.notation.python*/ }, + }, + end: /(?:$|(?=\#))/gm, + patterns: [ + { + debugName: "comments.patterns![1].patterns![0]", + + token: MetaTokenType.TypehintIgnore, /*comment.typehint.ignore.notation.python*/ + match: /ignore(?=\s*(?:$|\#))/gm, + }, + { + debugName: "comments.patterns![1].patterns![1]", + + token: MetaTokenType.TypehintType, /*comment.typehint.type.notation.python*/ + match: /(?))/g, + }, + { + debugName: "comments.patterns![1].patterns![3]", + + token: MetaTokenType.TypehintVariable, /*comment.typehint.variable.notation.python*/ + match: /([a-zA-Z_]\w*)/g, + }, + ] + }, + ] +}; + +export const docstringStatement: TokenPattern = { + debugName: "docstringStatement", + + // the string either terminates correctly or by the beginning of a new line (this is for single line docstrings that aren't terminated) AND it's not followed by another docstring + begin: /^(?=\s*[rR]?(\'\'\'|\"\"\"|\'|\"))/gm, + end: /((?<=\1)|^)(?!\s*[rR]?(\'\'\'|\"\"\"|\'|\"))/gm, + patterns: [ + ] +}; + +export const docstring: TokenPattern = { + patterns: [ + { + debugName: "docstring.patterns![0]", + + token: MetaTokenType.Docstring, /*string.quoted.docstring.multi.python*/ + begin: /(\'\'\'|\"\"\")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringBegin, /*punctuation.definition.string.begin.python*/ }, + }, + end: /(\1)/dg, + endCaptures: { + 1: { token: MetaTokenType.StringEnd, /*punctuation.definition.string.end.python*/ }, + }, + patterns: [ + ] + }, + { + debugName: "docstring.patterns![1]", + + token: MetaTokenType.Docstring, /*string.quoted.docstring.raw.multi.python*/ + begin: /([rR])(\'\'\'|\"\"\")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringStorageType, /*storage.type.string.python*/ }, + 2: { token: MetaTokenType.StringBegin, /*punctuation.definition.string.begin.python*/ }, + }, + end: /(\2)/dg, + endCaptures: { + 1: { token: MetaTokenType.StringEnd, /*punctuation.definition.string.end.python*/ }, + }, + patterns: [ + ] + }, + { + debugName: "docstring.patterns![2]", + + token: MetaTokenType.Docstring, /*string.quoted.docstring.single.python*/ + begin: /(\'|\")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringBegin, /*punctuation.definition.string.begin.python*/ }, + }, + end: /(\1)|(\n)/dg, + endCaptures: { + 1: { token: MetaTokenType.StringEnd, /*punctuation.definition.string.end.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + ] + }, + { + debugName: "docstring.patterns![3]", + + token: MetaTokenType.Docstring, /*string.quoted.docstring.raw.single.python*/ + begin: /([rR])(\'|\")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringStorageType, /*storage.type.string.python*/ }, + 2: { token: MetaTokenType.StringBegin, /*punctuation.definition.string.begin.python*/ }, + }, + end: /(\2)|(\n)/dg, + endCaptures: { + 1: { token: MetaTokenType.StringEnd, /*punctuation.definition.string.end.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [] + }] +}; + +export const docstringGutsUnicode: TokenPattern = { + patterns: [ + ] +}; + +export const docstringPrompt: TokenPattern = { + debugName: "docstringPrompt", + + match: /(?:\s*((?:>>>|\.\.\.)\s)(?=\s*\S))/dg, + captures: { + 1: { token: MetaTokenType.ControlFlowKeyword, /*keyword.control.flow.python*/ }, + }, +}; + +export const statementKeyword: TokenPattern = { + patterns: [ + { + debugName: "statementKeyword.patterns![0]", + + token: KeywordTokenType.Def, /*storage.type.function.python*/ + match: /\b((async\s+)?\s*def)\b/g, + }, + { + debugName: "statementKeyword.patterns![1]", + + // if `as` is eventually followed by `:` or line continuation, it's probably control flow like: with foo as bar, \ + // Foo as Bar: try: do_stuff() except Exception as e: pass + token: MetaTokenType.ControlFlowKeyword, /*keyword.control.flow.python*/ + match: /\b(?>=|\/\/=|\*\*=|\+=|-=|\/=|@=|\*=|%=|~=|\^=|&=|\|=|=(?!=)/gm, +}; + +export const operator: TokenPattern = { + debugName: "operator", + + match: /\b(?>|&|\||\^|~)|(\*\*|\*|\+|-|%|\/\/|\/|@)|(!=|==|>=|<=|<|>)|(:=)/dgm, + captures: { + 1: { token: MetaTokenType.LogicalOperatorKeyword, /*keyword.operator.logical.python*/ }, + 2: { token: MetaTokenType.ControlFlowKeyword, /*keyword.control.flow.python*/ }, + 3: { token: MetaTokenType.BitwiseOperatorKeyword, /*keyword.operator.bitwise.python*/ }, + 4: { token: MetaTokenType.ArithmeticOperator, /*keyword.operator.arithmetic.python*/ }, + 5: { token: MetaTokenType.ComparisonOperatorKeyword, /*keyword.operator.comparison.python*/ }, + 6: { token: OperatorTokenType.Assignment, /*keyword.operator.assignment.python*/ }, + }, +}; + +export const punctuation: TokenPattern = { + patterns: [ + { + debugName: "punctuation.patterns![0]", + + token: CharacterTokenType.Colon, /*punctuation.separator.colon.python*/ + match: /:/g, + }, + { + debugName: "punctuation.patterns![1]", + + token: CharacterTokenType.Comma, /*punctuation.separator.element.python*/ + match: /,/g, + }, + ] +}; + +export const literal: TokenPattern = { + patterns: [ + { + debugName: "literal.patterns![0]", + + token: MetaTokenType.ConstantLiteral, /*constant.language.python*/ + match: /\b(True|False|None|NotImplemented|Ellipsis)\b/g, + }, + ] +}; + +export const number: TokenPattern = { + patterns: [ + { + debugName: "number.patterns![6]", + + token: MetaTokenType.Invalid, /*invalid.illegal.name.python*/ + match: /\b[0-9]+\w+/g, + }, + ] +}; + +export const numberFloat: TokenPattern = { + debugName: "numberFloat", + + token: LiteralTokenType.Float, /*constant.numeric.float.python*/ + match: /(?=^]?[-+ ]?\#?\d*,?(\.\d+)?[bcdeEfFgGnosxX%]?)?}))/dgm, + captures: { + 1: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + 3: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 4: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + }, + { + debugName: "stringBraceFormatting.patterns![1]", + + token: MetaTokenType.FormatBrace, /*meta.format.brace.python*/ + match: /({\w*(\.[a-zA-Z_]\w*|\[[^\]'"]+\])*(![rsa])?(:)[^'"{}\n]*(?:\{[^'"}\n]*?\}[^'"{}\n]*)*})/dg, + captures: { + 1: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + 3: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 4: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + }, + ] +}; + +export const fstringFormatting: TokenPattern = { + patterns: [ + ] +}; + +export const fstringFormattingSingeBrace: TokenPattern = { + debugName: "fstringFormattingSingeBrace", + + token: MetaTokenType.Invalid, /*invalid.illegal.brace.python*/ + match: /(}(?!}))/g, +}; + +export const importStatement: TokenPattern = { + // Import statements used to correctly mark `from`, `import`, and `as` + patterns: [ + { + debugName: "importStatement.patterns![0]", + + begin: /\b(?)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.Colon, /*punctuation.separator.annotation.result.python*/ }, + }, + end: /(?=:)/g, + patterns: [expression] +}; + +export const itemAccess: TokenPattern = { + patterns: [ + { + debugName: "itemAccess.patterns![0]", + + token: MetaTokenType.ItemAccess, /*meta.item-access.python*/ + begin: /\b(?=[a-zA-Z_]\w*\s*\[)/g, + end: /(\])/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, + }, + patterns: [expression] + }, + ] +}; + +export const itemName: TokenPattern = { + patterns: [ + specialNames, + { + debugName: "itemName.patterns![3]", + + token: MetaTokenType.IndexedName, /*meta.indexed-name.python*/ + match: /\b([a-zA-Z_]\w*)\b/g, + }, + ] +}; + +export const itemIndex: TokenPattern = { + debugName: "itemIndex", + + contentToken: MetaTokenType.Arguments, /*meta.item-access.arguments.python*/ + begin: /(\[)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.definition.arguments.begin.python*/ }, + }, + end: /(?=\])/g, + patterns: [ + { + debugName: "itemIndex.patterns![0]", + + token: CharacterTokenType.Colon, /*punctuation.separator.slice.python*/ + match: /:/g, + }, + expression, + ] +}; + +export const decorator: TokenPattern = { + debugName: "decorator", + + token: MetaTokenType.FunctionDefinition, /*meta.function.decorator.python*/ + begin: /^\s*((@))\s*(?=[a-zA-Z_]\w*)/dgm, + beginCaptures: { + 1: { token: EntityTokenType.FunctionName, /*entity.name.function.decorator.python*/ }, + 2: { token: CharacterTokenType.AtSymbol, /*punctuation.definition.decorator.python*/ }, + }, + end: /(\))(?:(.*?)(?=\s*(?:\#|$)))|(?=\n|\#)/dgm, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.decorator.python*/ }, + }, + patterns: [ + ] +}; + +export const decoratorName: TokenPattern = { + patterns: [ + { + debugName: "decoratorName.patterns![2]", + + token: EntityTokenType.FunctionName, /*entity.name.function.decorator.python*/ + match: /([a-zA-Z_]\w*)|(\.)/dg, + captures: { + 2: { token: CharacterTokenType.Period, /*punctuation.separator.period.python*/ }, + }, + }, + lineContinuation, + { + debugName: "decoratorName.patterns![4]", + + token: MetaTokenType.Invalid, /*invalid.illegal.decorator.python*/ + match: /\s*([^(a-zA-Z\s_\.#\\].*?)(?=\#|$)/dgm, + captures: { + 1: { token: MetaTokenType.Invalid, /*invalid.illegal.decorator.python*/ }, + }, + }, + ] +}; + +export const callWrapperInheritance: TokenPattern = { + debugName: "callWrapperInheritance", + + // same as a function call, but in inheritance context + token: MetaTokenType.FunctionCall, /*meta.function-call.python*/ + begin: /\b(?=([a-zA-Z_]\w*)\s*(\())/g, + end: /(\))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, + }, + patterns: [ + ] +}; + +export const inheritanceName: TokenPattern = { + patterns: [ + lambdaIncomplete, + inheritanceIdentifier, + ] +}; + +export const functionCall: TokenPattern = { + debugName: "functionCall", + + // Regular function call of the type "name(args)" + token: MetaTokenType.FunctionCall, /*meta.function-call.python*/ + begin: /\b(?=([a-zA-Z_]\w*)\s*(\())/g, + end: /(\))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, + }, + patterns: [ + ] +}; + +export const functionName: TokenPattern = { + patterns: [ + { + debugName: "functionName.patterns![1]", + + // Some color schemas support meta.function-call.generic scope + token: MetaTokenType.FunctionCall, /*meta.function-call.generic.python*/ + match: /\b([a-zA-Z_]\w*)\b/g, + }, + ] +}; + +export const functionArguments: TokenPattern = { + debugName: "functionArguments", + + contentToken: MetaTokenType.Arguments, /*meta.function-call.arguments.python*/ + begin: /(\()/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.definition.arguments.begin.python*/ }, + }, + end: /(?=\))(?!\)\s*\()/g, + patterns: [ + { + debugName: "functionArguments.patterns![0]", + + token: CharacterTokenType.Comma, /*punctuation.separator.arguments.python*/ + match: /(,)/g, + }, + { + debugName: "functionArguments.patterns![1]", + + match: /(?:(?<=[,(])|^)\s*(\*{1,2})/dgm, + captures: { + 1: { token: OperatorTokenType.Unpacking, /*keyword.operator.unpacking.arguments.python*/ }, + }, + }, + lambdaIncomplete, + { + debugName: "functionArguments.patterns![4]", + + match: /\b([a-zA-Z_]\w*)\s*(=)(?!=)/dg, + captures: { + 1: { token: EntityTokenType.VariableName, /*variable.parameter.function-call.python*/ }, + 2: { token: OperatorTokenType.Assignment, /*keyword.operator.assignment.python*/ }, + }, + }, + { + debugName: "functionArguments.patterns![5]", + + token: OperatorTokenType.Assignment, /*keyword.operator.assignment.python*/ + match: /=(?!=)/g, + }, + expression, + { + debugName: "functionArguments.patterns![7]", + + match: /\s*(\))\s*(\()/dg, + captures: { + 1: { token: CharacterTokenType.CloseParentheses, /*punctuation.definition.arguments.end.python*/ }, + 2: { token: CharacterTokenType.OpenParentheses, /*punctuation.definition.arguments.begin.python*/ }, + }, + }, + ] +}; + +export const builtinCallables: TokenPattern = { + patterns: [] +}; + +export const builtinPossibleCallables: TokenPattern = { + patterns: [ + builtinCallables] +}; + +export const builtinExceptions: TokenPattern = { + debugName: "builtinExceptions", + + token: MetaTokenType.BuiltinExceptionType, /*support.type.exception.python*/ + match: /(?/g, +}; + +export const regexpBaseExpression: TokenPattern = { + patterns: [ + ] +}; + +export const fregexpBaseExpression: TokenPattern = { + patterns: [ + { + debugName: "fregexpBaseExpression.patterns![2]", + + match: /\{.*?\}/g, + }, + ] +}; + +export const fstringFormattingBraces: TokenPattern = { + patterns: [ + { + debugName: "fstringFormattingBraces.patterns![0]", + + // empty braces are illegal + match: /({)(\s*?)(})/dg, + captures: { + 1: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.brace.python*/ }, + 3: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + }, + }, + { + debugName: "fstringFormattingBraces.patterns![1]", + + token: MetaTokenType.EscapeSequence, /*constant.character.escape.python*/ + match: /({{|}})/g, + }, + ] +}; + +export const regexpBaseCommon: TokenPattern = { + patterns: [ + { + debugName: "regexpBaseCommon.patterns![0]", + + token: CharacterTokenType.Period, /*support.other.match.any.regexp*/ + match: /\./g, + }, + { + debugName: "regexpBaseCommon.patterns![1]", + + token: CharacterTokenType.Caret, /*support.other.match.begin.regexp*/ + match: /\^/gm, + }, + { + debugName: "regexpBaseCommon.patterns![2]", + + token: CharacterTokenType.DollarSymbol, /*support.other.match.end.regexp*/ + match: /\$/gm, + }, + { + debugName: "regexpBaseCommon.patterns![3]", + + token: OperatorTokenType.Quantifier, /*keyword.operator.quantifier.regexp*/ + match: /[+*?]\??/g, + }, + { + debugName: "regexpBaseCommon.patterns![4]", + + token: OperatorTokenType.Disjunction, /*keyword.operator.disjunction.regexp*/ + match: /\|/g, + }, + ] +}; + +export const regexpQuantifier: TokenPattern = { + debugName: "regexpQuantifier", + + token: OperatorTokenType.Quantifier, /*keyword.operator.quantifier.regexp*/ + match: /\{(\d+|\d+,(\d+)?|,\d+)\}/g, +}; + +export const fregexpQuantifier: TokenPattern = { + debugName: "fregexpQuantifier", + + token: OperatorTokenType.Quantifier, /*keyword.operator.quantifier.regexp*/ + match: /\{\{(\d+|\d+,(\d+)?|,\d+)\}\}/g, +}; + +export const regexpBackreferenceNumber: TokenPattern = { + debugName: "regexpBackreferenceNumber", + + token: MetaTokenType.Backreference, /*meta.backreference.regexp*/ + match: /(\\[1-9]\d?)/dg, + captures: { + 1: { token: EntityTokenType.TagName, /*entity.name.tag.backreference.regexp*/ }, + }, +}; + +export const regexpBackreference: TokenPattern = { + debugName: "regexpBackreference", + + token: MetaTokenType.BackreferenceNamed, /*meta.backreference.named.regexp*/ + match: /(\()(\?P=\w+(?:\s+[a-zA-Z0-9]+)?)(\))/dg, + captures: { + 1: { token: CharacterTokenType.OpenParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.begin.backreference.named.regexp*/ }, + 2: { token: EntityTokenType.TagName, /*entity.name.tag.named.backreference.regexp*/ }, + 3: { token: CharacterTokenType.CloseParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.end.backreference.named.regexp*/ }, + }, +}; + +export const regexpFlags: TokenPattern = { + debugName: "regexpFlags", + + token: MetaTokenType.ModifierFlagStorageType, /*storage.modifier.flag.regexp*/ + match: /\(\?[aiLmsux]+\)/g, +}; + +export const regexpEscapeSpecial: TokenPattern = { + debugName: "regexpEscapeSpecial", + + token: MetaTokenType.EscapeSequence, /*support.other.escape.special.regexp*/ + match: /\\([AbBdDsSwWZ])/g, +}; + +export const regexpEscapeCharacter: TokenPattern = { + debugName: "regexpEscapeCharacter", + + token: MetaTokenType.EscapeSequence, /*constant.character.escape.regexp*/ + match: /\\(x[0-9A-Fa-f]{2}|0[0-7]{1,2}|[0-7]{3})/g, +}; + +export const regexpEscapeUnicode: TokenPattern = { + debugName: "regexpEscapeUnicode", + + token: MetaTokenType.EscapeSequence, /*constant.character.unicode.regexp*/ + match: /\\(u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/g, +}; + +export const regexpEscapeCatchall: TokenPattern = { + debugName: "regexpEscapeCatchall", + + token: MetaTokenType.EscapeSequence, /*constant.character.escape.regexp*/ + match: /\\(.|\n)/g, +}; + +export const regexpEscapeSequence: TokenPattern = { + patterns: [ + regexpEscapeSpecial, + regexpEscapeCharacter, + regexpEscapeUnicode, + regexpBackreferenceNumber, + regexpEscapeCatchall, + ] +}; + +export const regexpCharecterSetEscapes: TokenPattern = { + patterns: [ + { + debugName: "regexpCharecterSetEscapes.patterns![0]", + + token: MetaTokenType.EscapeSequence, /*constant.character.escape.regexp*/ + match: /\\[abfnrtv\\]/g, + }, + regexpEscapeSpecial, + { + debugName: "regexpCharecterSetEscapes.patterns![2]", + + token: MetaTokenType.EscapeSequence, /*constant.character.escape.regexp*/ + match: /\\([0-7]{1,3})/g, + }, + regexpEscapeCharacter, + regexpEscapeUnicode, + regexpEscapeCatchall, + ] +}; + +export const codetags: TokenPattern = { + debugName: "codetags", + + match: /(?:\b(NOTE|XXX|HACK|FIXME|BUG|TODO)\b)/dg, + captures: { + 1: { token: MetaTokenType.CommentCodeTag, /*keyword.codetag.notation.python*/ }, + }, +}; + +export const commentsBase: TokenPattern = { + debugName: "commentsBase", + + token: MetaTokenType.Comment, /*comment.line.number-sign.python*/ + begin: /(\#)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.Hashtag, /*punctuation.definition.comment.python*/ }, + }, + end: /($)/gm, + patterns: [codetags] +}; + +export const commentsStringSingleThree: TokenPattern = { + debugName: "commentsStringSingleThree", + + token: MetaTokenType.Comment, /*comment.line.number-sign.python*/ + begin: /(\#)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.Hashtag, /*punctuation.definition.comment.python*/ }, + }, + end: /($|(?='''))/gm, + patterns: [codetags] +}; + +export const commentsStringDoubleThree: TokenPattern = { + debugName: "commentsStringDoubleThree", + + token: MetaTokenType.Comment, /*comment.line.number-sign.python*/ + begin: /(\#)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.Hashtag, /*punctuation.definition.comment.python*/ }, + }, + end: /($|(?="""))/gm, + patterns: [codetags] +}; + +export const singleOneRegexpExpression: TokenPattern = { + patterns: [ + regexpBaseExpression, + regexpFlags, + regexpBackreference, + ] +}; + +export const singleOneRegexpCharacterSet: TokenPattern = { + patterns: [ + { + debugName: "singleOneRegexpCharacterSet.patterns![0]", + + match: /\[\^?\](?!.*?\])/gm, + }, + { + debugName: "singleOneRegexpCharacterSet.patterns![1]", + + token: MetaTokenType.CharacterSet, /*meta.character.set.regexp*/ + begin: /(\[)(\^)?(\])?/dgm, + beginCaptures: { + 1: { token: CharacterTokenType.OpenSquareBracket, /*constant.other.set.regexp punctuation.character.set.begin.regexp*/ }, + 2: { token: OperatorTokenType.Negation, /*keyword.operator.negation.regexp*/ }, + 3: { token: MetaTokenType.CharacterSet, /*constant.character.set.regexp*/ }, + }, + end: /(\]|(?=\'))|((?=(?)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp*/ }, + 2: { token: EntityTokenType.TagName, /*entity.name.tag.named.group.regexp*/ }, + }, + end: /(\)|(?=\'))|((?=(?)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp*/ }, + 2: { token: EntityTokenType.TagName, /*entity.name.tag.named.group.regexp*/ }, + }, + end: /(\)|(?=\'\'\'))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.end.named.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + singleThreeRegexpExpression, + commentsStringSingleThree, + ] +}; + +export const singleThreeRegexpComments: TokenPattern = { + debugName: "singleThreeRegexpComments", + + token: MetaTokenType.Comment, /*comment.regexp*/ + begin: /\(\?#/dg, + beginCaptures: { + 0: { token: MetaTokenType.CommentBegin, /*punctuation.comment.begin.regexp*/ }, + }, + end: /(\)|(?=\'\'\'))/dg, + endCaptures: { + 1: { token: MetaTokenType.CommentEnd, /*punctuation.comment.end.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [codetags] +}; + +export const singleThreeRegexpLookahead: TokenPattern = { + debugName: "singleThreeRegexpLookahead", + + begin: /(\()\?=/dg, + beginCaptures: { + 0: { token: OperatorTokenType.Lookahead, /*keyword.operator.lookahead.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookahead.regexp*/ }, + }, + end: /(\)|(?=\'\'\'))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookahead.regexp punctuation.parenthesis.end.lookahead.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + singleThreeRegexpExpression, + commentsStringSingleThree, + ] +}; + +export const singleThreeRegexpLookaheadNegative: TokenPattern = { + debugName: "singleThreeRegexpLookaheadNegative", + + begin: /(\()\?!/dg, + beginCaptures: { + 0: { token: OperatorTokenType.LookaheadNegative, /*keyword.operator.lookahead.negative.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookahead.regexp*/ }, + }, + end: /(\)|(?=\'\'\'))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookahead.negative.regexp punctuation.parenthesis.end.lookahead.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + singleThreeRegexpExpression, + commentsStringSingleThree, + ] +}; + +export const singleThreeRegexpLookbehind: TokenPattern = { + debugName: "singleThreeRegexpLookbehind", + + begin: /(\()\?<=/dg, + beginCaptures: { + 0: { token: OperatorTokenType.Lookbehind, /*keyword.operator.lookbehind.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookbehind.regexp*/ }, + }, + end: /(\)|(?=\'\'\'))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookbehind.regexp punctuation.parenthesis.end.lookbehind.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + singleThreeRegexpExpression, + commentsStringSingleThree, + ] +}; + +export const singleThreeRegexpLookbehindNegative: TokenPattern = { + debugName: "singleThreeRegexpLookbehindNegative", + + begin: /(\()\?)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp*/ }, + 2: { token: EntityTokenType.TagName, /*entity.name.tag.named.group.regexp*/ }, + }, + end: /(\)|(?="))|((?=(?)/dg, + beginCaptures: { + 1: { token: CharacterTokenType.OpenParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp*/ }, + 2: { token: EntityTokenType.TagName, /*entity.name.tag.named.group.regexp*/ }, + }, + end: /(\)|(?="""))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*support.other.parenthesis.regexp punctuation.parenthesis.end.named.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + doubleThreeRegexpExpression, + commentsStringDoubleThree, + ] +}; + +export const doubleThreeRegexpComments: TokenPattern = { + debugName: "doubleThreeRegexpComments", + + token: MetaTokenType.Comment, /*comment.regexp*/ + begin: /\(\?#/dg, + beginCaptures: { + 0: { token: MetaTokenType.CommentBegin, /*punctuation.comment.begin.regexp*/ }, + }, + end: /(\)|(?="""))/dg, + endCaptures: { + 1: { token: MetaTokenType.CommentEnd, /*punctuation.comment.end.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [codetags] +}; + +export const doubleThreeRegexpLookahead: TokenPattern = { + debugName: "doubleThreeRegexpLookahead", + + begin: /(\()\?=/dg, + beginCaptures: { + 0: { token: OperatorTokenType.Lookahead, /*keyword.operator.lookahead.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookahead.regexp*/ }, + }, + end: /(\)|(?="""))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookahead.regexp punctuation.parenthesis.end.lookahead.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + doubleThreeRegexpExpression, + commentsStringDoubleThree, + ] +}; + +export const doubleThreeRegexpLookaheadNegative: TokenPattern = { + debugName: "doubleThreeRegexpLookaheadNegative", + + begin: /(\()\?!/dg, + beginCaptures: { + 0: { token: OperatorTokenType.LookaheadNegative, /*keyword.operator.lookahead.negative.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookahead.regexp*/ }, + }, + end: /(\)|(?="""))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookahead.negative.regexp punctuation.parenthesis.end.lookahead.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + doubleThreeRegexpExpression, + commentsStringDoubleThree, + ] +}; + +export const doubleThreeRegexpLookbehind: TokenPattern = { + debugName: "doubleThreeRegexpLookbehind", + + begin: /(\()\?<=/dg, + beginCaptures: { + 0: { token: OperatorTokenType.Lookbehind, /*keyword.operator.lookbehind.regexp*/ }, + 1: { token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.lookbehind.regexp*/ }, + }, + end: /(\)|(?="""))/dg, + endCaptures: { + 1: { token: CharacterTokenType.CloseParentheses, /*keyword.operator.lookbehind.regexp punctuation.parenthesis.end.lookbehind.regexp*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + doubleThreeRegexpExpression, + commentsStringDoubleThree, + ] +}; + +export const doubleThreeRegexpLookbehindNegative: TokenPattern = { + debugName: "doubleThreeRegexpLookbehindNegative", + + begin: /(\()\?=^]?[-+ ]?\#?\d*,?(\.\d+)?[bcdeEfFgGnosxX%]?)(?=})/dgm, + captures: { + 1: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 2: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + }, + ] +}; + +export const fstringTerminatorSingleTail: TokenPattern = { + debugName: "fstringTerminatorSingleTail", + + begin: /((?:=?)(?:![rsa])?)(:)(?=.*?{)/dg, + beginCaptures: { + 1: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 2: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + end: /(?=})|(?=\n)/g, + patterns: [ + fstringIllegalSingleBrace, + fstringSingleBrace, + { + debugName: "fstringTerminatorSingleTail.patterns![2]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([bcdeEfFgGnosxX%])(?=})/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![3]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\.\d+)/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![4]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(,)/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![5]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\d+)/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![6]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\#)/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![7]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([-+ ])/g, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![8]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([<>=^])/gm, + }, + { + debugName: "fstringTerminatorSingleTail.patterns![9]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\w)/g, + }, + ] +}; + +export const fstringFnormQuotedMultiLine: TokenPattern = { + debugName: "fstringFnormQuotedMultiLine", + + token: MetaTokenType.Fstring, /*meta.fstring.python*/ + begin: /(\b[fF])([bBuU])?('''|""")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringStorageType, /*string.interpolated.python string.quoted.multi.python storage.type.string.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.prefix.python*/ }, + 3: { token: LiteralTokenType.String, /*punctuation.definition.string.begin.python string.interpolated.python string.quoted.multi.python*/ }, + }, + end: /(\3)/dg, + endCaptures: { + 1: { token: LiteralTokenType.String, /*punctuation.definition.string.end.python string.interpolated.python string.quoted.multi.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + fstringGuts, + fstringIllegalMultiBrace, + ] +}; + +export const fstringNormfQuotedMultiLine: TokenPattern = { + debugName: "fstringNormfQuotedMultiLine", + + token: MetaTokenType.Fstring, /*meta.fstring.python*/ + begin: /(\b[bBuU])([fF])('''|""")/dg, + beginCaptures: { + 1: { token: MetaTokenType.Invalid, /*invalid.illegal.prefix.python*/ }, + 2: { token: MetaTokenType.StringStorageType, /*string.interpolated.python string.quoted.multi.python storage.type.string.python*/ }, + 3: { token: MetaTokenType.StringBegin, /*string.quoted.multi.python punctuation.definition.string.begin.python*/ }, + }, + end: /(\3)/dg, + endCaptures: { + 1: { token: LiteralTokenType.String, /*punctuation.definition.string.end.python string.interpolated.python string.quoted.multi.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + fstringGuts, + fstringIllegalMultiBrace, + ] +}; + +export const fstringRawQuotedMultiLine: TokenPattern = { + debugName: "fstringRawQuotedMultiLine", + + token: MetaTokenType.Fstring, /*meta.fstring.python*/ + begin: /(\b(?:[rR][fF]|[fF][rR]))('''|""")/dg, + beginCaptures: { + 1: { token: MetaTokenType.StringStorageType, /*string.interpolated.python string.quoted.raw.multi.python storage.type.string.python*/ }, + 2: { token: MetaTokenType.StringBegin, /*string.quoted.raw.multi.python punctuation.definition.string.begin.python*/ }, + }, + end: /(\2)/dg, + endCaptures: { + 1: { token: LiteralTokenType.String, /*punctuation.definition.string.end.python string.interpolated.python string.quoted.raw.multi.python*/ }, + 2: { token: MetaTokenType.Invalid, /*invalid.illegal.newline.python*/ }, + }, + patterns: [ + fstringRawGuts, + fstringIllegalMultiBrace, + ] +}; + +export const fstringMultiCore: TokenPattern = { + debugName: "fstringMultiCore", + + token: LiteralTokenType.String, /*string.interpolated.python string.quoted.multi.python*/ + match: /(.+?)(($\n?)|(?=[\\\}\{]|'''|"""))|\n/gm, +}; + +export const fstringRawMultiCore: TokenPattern = { + debugName: "fstringRawMultiCore", + + token: LiteralTokenType.String, /*string.interpolated.python string.quoted.raw.multi.python*/ + match: /(.+?)(($\n?)|(?=[\\\}\{]|'''|"""))|\n/gm, +}; + +export const fstringMultiBrace: TokenPattern = { + debugName: "fstringMultiBrace", + + // value interpolation using { ... } + begin: /(\{)/dg, + beginCaptures: { + 1: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + }, + end: /(\})/dg, + endCaptures: { + 1: { token: MetaTokenType.Placeholder, /*constant.character.format.placeholder.other.python*/ }, + }, + patterns: [fExpression] +}; + +export const fstringTerminatorMulti: TokenPattern = { + patterns: [ + { + debugName: "fstringTerminatorMulti.patterns![0]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(=(![rsa])?)(?=})/g, + }, + { + debugName: "fstringTerminatorMulti.patterns![1]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(=?![rsa])(?=})/g, + }, + { + debugName: "fstringTerminatorMulti.patterns![2]", + + match: /((?:=?)(?:![rsa])?)(:\w?[<>=^]?[-+ ]?\#?\d*,?(\.\d+)?[bcdeEfFgGnosxX%]?)(?=})/dgm, + captures: { + 1: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 2: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + }, + ] +}; + +export const fstringTerminatorMultiTail: TokenPattern = { + debugName: "fstringTerminatorMultiTail", + + begin: /((?:=?)(?:![rsa])?)(:)(?=.*?{)/dg, + beginCaptures: { + 1: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + 2: { token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ }, + }, + end: /(?=})/g, + patterns: [ + fstringIllegalMultiBrace, + fstringMultiBrace, + { + debugName: "fstringTerminatorMultiTail.patterns![2]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([bcdeEfFgGnosxX%])(?=})/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![3]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\.\d+)/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![4]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(,)/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![5]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\d+)/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![6]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\#)/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![7]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([-+ ])/g, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![8]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /([<>=^])/gm, + }, + { + debugName: "fstringTerminatorMultiTail.patterns![9]", + + token: MetaTokenType.FormatStorageType, /*storage.type.format.python*/ + match: /(\w)/g, + }, + ] +}; + +// Push pattern references that were not defined on include +python.patterns!.push(statement, expression); +statement.patterns!.push(importStatement, classDeclaration, functionDeclaration, generator, statementKeyword, assignmentOperator, decorator, docstringStatement, semicolon); +comments.patterns!.push(commentsBase); +docstringStatement.patterns!.push(docstring); +docstring.patterns![0].patterns!.push(docstringPrompt, codetags, docstringGutsUnicode); +docstring.patterns![1].patterns!.push(stringConsumeEscape, docstringPrompt, codetags); +docstring.patterns![2].patterns!.push(codetags, docstringGutsUnicode); +docstring.patterns![3].patterns!.push(stringConsumeEscape, codetags); +docstringGutsUnicode.patterns!.push(escapeSequenceUnicode, escapeSequence, stringLineContinuation); +expressionBare.patterns!.push(backticks, illegalAnno, literal, regexp, string, lambda, generator, illegalOperator, operator, curlyBraces, itemAccess, list, oddFunctionCall, roundBraces, functionCall, builtinFunctions, builtinTypes, builtinExceptions, magicNames, specialNames, illegalNames, specialVariables, ellipsis, punctuation, lineContinuation); +expressionBase.patterns!.push(lineContinuation); +expression.patterns!.splice(1, 0, memberAccess); +memberAccess.patterns!.push(functionCall, memberAccessBase, memberAccessAttribute); +memberAccessBase.patterns!.push(magicNames, illegalNames, illegalObjectName, specialNames, lineContinuation, itemAccess); +oddFunctionCall.patterns!.push(functionArguments); +lineContinuation.patterns![1].patterns!.push(regexp, string); +literal.patterns!.push(number); +number.patterns!.splice(0, 0, numberFloat); +number.patterns!.splice(1, 0, numberDec); +number.patterns!.splice(2, 0, numberHex); +number.patterns!.splice(3, 0, numberOct); +number.patterns!.splice(4, 0, numberBin); +number.patterns!.splice(5, 0, numberLong); +regexp.patterns!.push(regexpSingleThreeLine, regexpDoubleThreeLine, regexpSingleOneLine, regexpDoubleOneLine); +string.patterns!.push(stringQuotedMultiLine, stringQuotedSingleLine, stringBinQuotedMultiLine, stringBinQuotedSingleLine, stringRawQuotedMultiLine, stringRawQuotedSingleLine, stringRawBinQuotedMultiLine, stringRawBinQuotedSingleLine, fstringFnormQuotedMultiLine, fstringFnormQuotedSingleLine, fstringNormfQuotedMultiLine, fstringNormfQuotedSingleLine, fstringRawQuotedMultiLine, fstringRawQuotedSingleLine); +stringUnicodeGuts.patterns!.push(escapeSequenceUnicode, stringEntity, stringBraceFormatting); +stringRawGuts.patterns!.push(stringFormatting, stringBraceFormatting); +stringRawBinGuts.patterns!.push(stringFormatting); +stringEntity.patterns!.push(escapeSequence, stringLineContinuation, stringFormatting); +fstringGuts.patterns!.push(escapeSequenceUnicode, escapeSequence, stringLineContinuation, fstringFormatting); +fstringRawGuts.patterns!.push(fstringFormatting); +fstringIllegalSingleBrace.patterns!.push(fstringTerminatorSingle, fExpression); +fstringFormatting.patterns!.push(fstringFormattingBraces, fstringFormattingSingeBrace); +classDeclaration.patterns![0].patterns!.push(className, classInheritance); +className.patterns!.splice(0, 0, illegalObjectName); +className.patterns!.splice(1, 0, builtinPossibleCallables); +classInheritance.patterns!.splice(4, 0, illegalNames); +classInheritance.patterns!.splice(5, 0, classKwarg); +classInheritance.patterns!.splice(6, 0, callWrapperInheritance); +classInheritance.patterns!.push(memberAccessClass, inheritanceIdentifier); +memberAccessClass.patterns!.splice(0, 0, callWrapperInheritance); +lambda.patterns![2].patterns!.splice(2, 0, lambdaNestedIncomplete); +lambda.patterns![2].patterns!.splice(3, 0, illegalNames); +lambda.patterns![2].patterns!.splice(6, 0, backticks); +lambda.patterns![2].patterns!.splice(7, 0, illegalAnno); +lambda.patterns![2].patterns!.splice(8, 0, lambdaParameterWithDefault); +lambda.patterns![2].patterns!.push(illegalOperator); +functionDeclaration.patterns!.splice(0, 0, functionDefName); +functionDeclaration.patterns!.splice(1, 0, parameters); +functionDeclaration.patterns!.push(returnAnnotation); +functionDefName.patterns!.splice(0, 0, illegalObjectName); +functionDefName.patterns!.splice(1, 0, builtinPossibleCallables); +parameters.patterns!.splice(3, 0, illegalNames); +parameters.patterns!.splice(4, 0, illegalObjectName); +parameters.patterns!.splice(5, 0, parameterSpecial); +parameters.patterns!.push(looseDefault, annotatedParameter); +itemAccess.patterns![0].patterns!.splice(0, 0, itemName); +itemAccess.patterns![0].patterns!.splice(1, 0, itemIndex); +itemName.patterns!.splice(0, 0, specialVariables); +itemName.patterns!.splice(1, 0, builtinFunctions); +decorator.patterns!.push(decoratorName, functionArguments); +decoratorName.patterns!.splice(0, 0, builtinCallables); +decoratorName.patterns!.splice(1, 0, illegalObjectName); +callWrapperInheritance.patterns!.push(inheritanceName, functionArguments); +inheritanceName.patterns!.splice(1, 0, builtinPossibleCallables); +functionCall.patterns!.push(specialVariables, functionName, functionArguments); +functionName.patterns!.splice(0, 0, builtinPossibleCallables); +functionArguments.patterns!.splice(3, 0, illegalNames); +builtinCallables.patterns!.push(illegalNames, illegalObjectName, builtinExceptions, builtinFunctions, builtinTypes); +builtinPossibleCallables.patterns!.push(magicNames); +regexpBaseExpression.patterns!.push(regexpQuantifier, regexpBaseCommon); +fregexpBaseExpression.patterns!.splice(0, 0, fregexpQuantifier); +fregexpBaseExpression.patterns!.splice(1, 0, fstringFormattingBraces); +fregexpBaseExpression.patterns!.push(regexpBaseCommon); +regexpBaseCommon.patterns!.push(regexpEscapeSequence); +singleOneRegexpExpression.patterns!.splice(1, 0, singleOneRegexpCharacterSet); +singleOneRegexpExpression.patterns!.splice(2, 0, singleOneRegexpComments); +singleOneRegexpExpression.patterns!.splice(4, 0, singleOneRegexpNamedGroup); +singleOneRegexpExpression.patterns!.push(singleOneRegexpLookahead, singleOneRegexpLookaheadNegative, singleOneRegexpLookbehind, singleOneRegexpLookbehindNegative, singleOneRegexpConditional, singleOneRegexpParenthesesNonCapturing, singleOneRegexpParentheses); +singleThreeRegexpExpression.patterns!.splice(1, 0, singleThreeRegexpCharacterSet); +singleThreeRegexpExpression.patterns!.splice(2, 0, singleThreeRegexpComments); +singleThreeRegexpExpression.patterns!.splice(4, 0, singleThreeRegexpNamedGroup); +singleThreeRegexpExpression.patterns!.splice(6, 0, singleThreeRegexpLookahead); +singleThreeRegexpExpression.patterns!.splice(7, 0, singleThreeRegexpLookaheadNegative); +singleThreeRegexpExpression.patterns!.splice(8, 0, singleThreeRegexpLookbehind); +singleThreeRegexpExpression.patterns!.splice(9, 0, singleThreeRegexpLookbehindNegative); +singleThreeRegexpExpression.patterns!.splice(10, 0, singleThreeRegexpConditional); +singleThreeRegexpExpression.patterns!.splice(11, 0, singleThreeRegexpParenthesesNonCapturing); +singleThreeRegexpExpression.patterns!.splice(12, 0, singleThreeRegexpParentheses); +doubleOneRegexpExpression.patterns!.splice(1, 0, doubleOneRegexpCharacterSet); +doubleOneRegexpExpression.patterns!.splice(2, 0, doubleOneRegexpComments); +doubleOneRegexpExpression.patterns!.splice(4, 0, doubleOneRegexpNamedGroup); +doubleOneRegexpExpression.patterns!.push(doubleOneRegexpLookahead, doubleOneRegexpLookaheadNegative, doubleOneRegexpLookbehind, doubleOneRegexpLookbehindNegative, doubleOneRegexpConditional, doubleOneRegexpParenthesesNonCapturing, doubleOneRegexpParentheses); +doubleThreeRegexpExpression.patterns!.splice(1, 0, doubleThreeRegexpCharacterSet); +doubleThreeRegexpExpression.patterns!.splice(2, 0, doubleThreeRegexpComments); +doubleThreeRegexpExpression.patterns!.splice(4, 0, doubleThreeRegexpNamedGroup); +doubleThreeRegexpExpression.patterns!.splice(6, 0, doubleThreeRegexpLookahead); +doubleThreeRegexpExpression.patterns!.splice(7, 0, doubleThreeRegexpLookaheadNegative); +doubleThreeRegexpExpression.patterns!.splice(8, 0, doubleThreeRegexpLookbehind); +doubleThreeRegexpExpression.patterns!.splice(9, 0, doubleThreeRegexpLookbehindNegative); +doubleThreeRegexpExpression.patterns!.splice(10, 0, doubleThreeRegexpConditional); +doubleThreeRegexpExpression.patterns!.splice(11, 0, doubleThreeRegexpParenthesesNonCapturing); +doubleThreeRegexpExpression.patterns!.splice(12, 0, doubleThreeRegexpParentheses); +stringRawQuotedSingleLine.patterns!.splice(0, 0, stringSingleBadBrace1FormattingRaw); +stringRawQuotedSingleLine.patterns!.splice(1, 0, stringSingleBadBrace2FormattingRaw); +stringQuotedSingleLine.patterns!.splice(0, 0, stringSingleBadBrace1FormattingUnicode); +stringQuotedSingleLine.patterns!.splice(1, 0, stringSingleBadBrace2FormattingUnicode); +stringRawQuotedMultiLine.patterns!.splice(0, 0, stringMultiBadBrace1FormattingRaw); +stringRawQuotedMultiLine.patterns!.splice(1, 0, stringMultiBadBrace2FormattingRaw); +stringQuotedMultiLine.patterns!.splice(0, 0, stringMultiBadBrace1FormattingUnicode); +stringQuotedMultiLine.patterns!.splice(1, 0, stringMultiBadBrace2FormattingUnicode); +fstringFnormQuotedSingleLine.patterns!.push(fstringSingleBrace, fstringSingleCore); +fstringNormfQuotedSingleLine.patterns!.push(fstringSingleBrace, fstringSingleCore); +fstringRawQuotedSingleLine.patterns!.push(fstringSingleBrace, fstringRawSingleCore); +fstringSingleBrace.patterns!.splice(0, 0, fstringTerminatorSingle); +fstringTerminatorSingle.patterns!.push(fstringTerminatorSingleTail); +fstringFnormQuotedMultiLine.patterns!.push(fstringMultiBrace, fstringMultiCore); +fstringNormfQuotedMultiLine.patterns!.push(fstringMultiBrace, fstringMultiCore); +fstringRawQuotedMultiLine.patterns!.push(fstringMultiBrace, fstringRawMultiCore); +fstringMultiBrace.patterns!.splice(0, 0, fstringTerminatorMulti); +fstringTerminatorMulti.patterns!.push(fstringTerminatorMultiTail); \ No newline at end of file diff --git a/src/tokenizer/python-token-patterns.ts b/src/tokenizer/python-token-patterns.ts deleted file mode 100644 index 5940792..0000000 --- a/src/tokenizer/python-token-patterns.ts +++ /dev/null @@ -1,101 +0,0 @@ -/* eslint-disable no-useless-escape */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -/* eslint-disable no-useless-backreference */ -import { charactersPatten, strings } from "./common-token-patterns"; -import { MetaTokenType } from "./renpy-tokens"; -import { TokenPattern, TokenRepoPattern } from "./token-pattern-types"; - -export const pythonMemberAccess: TokenPattern = { - patterns: [charactersPatten], -}; - -export const pythonExpressionBare: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonExpressionBase: TokenPattern = { - patterns: [strings], -}; -export const pythonExpression: TokenPattern = { - // All valid Python expressions - patterns: [pythonExpressionBase, pythonMemberAccess, charactersPatten], -}; - -export const semicolon: TokenPattern = { - patterns: [ - { - token: MetaTokenType.Invalid, - match: /\\;$/g, - }, - ], -}; - -export const pythonLiteral: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonIllegalOperator: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonOperator: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonCurlyBraces: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonItemAccess: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonList: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonOddFunctionCall: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonRoundBraces: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonFunctionCall: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonBuiltinFunctions: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonBuiltinTypes: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonBuiltinExceptions: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonMagicNames: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonSpecialNames: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonIllegalNames: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonSpecialVariables: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonEllipsis: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonPunctuation: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonLineContinuation: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonBuiltinPossibleCallables: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonFunctionArguments: TokenPattern = { - patterns: [charactersPatten], -}; -export const pythonNumber: TokenPattern = { - patterns: [charactersPatten], -}; - -export const pythonPatterns: TokenRepoPattern = { - patterns: [], -}; diff --git a/src/tokenizer/renpy-token-patterns.g.ts b/src/tokenizer/renpy-token-patterns.g.ts new file mode 100644 index 0000000..18a72c7 --- /dev/null +++ b/src/tokenizer/renpy-token-patterns.g.ts @@ -0,0 +1,1728 @@ +/* eslint-disable no-useless-escape */ +/* eslint-disable no-useless-backreference */ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR +// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. +// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. +// Last generated: 01/06/2023 14:57:48 (UTC+0) + +import { MetaTokenType, CharacterTokenType, LiteralTokenType, KeywordTokenType, EscapedCharacterTokenType, EntityTokenType, OperatorTokenType } from "./renpy-tokens"; +import { TokenPattern } from "./token-pattern-types"; + +export const statements: TokenPattern = { + patterns: [ + ] +}; + +export const expressions: TokenPattern = { + patterns: [ + ] +}; + +export const invalidToken: TokenPattern = { + debugName: "invalidToken", + + token: MetaTokenType.Invalid, /*invalid.unknown.token.renpy*/ + match: /.+/g, +}; + +export const newLine: TokenPattern = { + debugName: "newLine", + + token: CharacterTokenType.NewLine, /*punctuation.new-line.renpy*/ + match: /\r\n|\r|\n/g, +}; + +export const whitespace: TokenPattern = { + debugName: "whitespace", + + token: CharacterTokenType.Whitespace, /*punctuation.whitespace.renpy*/ + match: /[ \t]+/g, +}; + +export const fallbackCharacters: TokenPattern = { + // NOTE: Having these patterns separated increases performance. Benchmark before making a change! + patterns: [ + newLine, + whitespace, + { + debugName: "fallbackCharacters.patterns![2]", + + token: CharacterTokenType.OpenParentheses, /*punctuation.parenthesis.begin.renpy*/ + match: /\(/g, + }, + { + debugName: "fallbackCharacters.patterns![3]", + + token: CharacterTokenType.CloseParentheses, /*punctuation.parenthesis.end.renpy*/ + match: /\)/g, + }, + { + debugName: "fallbackCharacters.patterns![4]", + + token: CharacterTokenType.OpenBracket, /*punctuation.bracket.begin.renpy*/ + match: /{/g, + }, + { + debugName: "fallbackCharacters.patterns![5]", + + token: CharacterTokenType.CloseBracket, /*punctuation.bracket.end.renpy*/ + match: /}/g, + }, + { + debugName: "fallbackCharacters.patterns![6]", + + token: CharacterTokenType.OpenSquareBracket, /*punctuation.square-bracket.begin.renpy*/ + match: /\[/g, + }, + { + debugName: "fallbackCharacters.patterns![7]", + + token: CharacterTokenType.CloseSquareBracket, /*punctuation.square-bracket.end.renpy*/ + match: /\]/g, + }, + { + debugName: "fallbackCharacters.patterns![8]", + + token: CharacterTokenType.Period, /*punctuation.period.renpy*/ + match: /\./g, + }, + { + debugName: "fallbackCharacters.patterns![9]", + + token: CharacterTokenType.Colon, /*punctuation.colon.renpy*/ + match: /:/g, + }, + { + debugName: "fallbackCharacters.patterns![10]", + + token: CharacterTokenType.Semicolon, /*punctuation.semicolon.renpy*/ + match: /;/g, + }, + { + debugName: "fallbackCharacters.patterns![11]", + + token: CharacterTokenType.Comma, /*punctuation.comma.renpy*/ + match: /,/g, + }, + { + debugName: "fallbackCharacters.patterns![12]", + + token: CharacterTokenType.Hashtag, /*punctuation.hashtag.renpy*/ + match: /#/g, + }, + { + debugName: "fallbackCharacters.patterns![13]", + + token: CharacterTokenType.Quote, /*punctuation.quote.renpy*/ + match: /'/g, + }, + { + debugName: "fallbackCharacters.patterns![14]", + + token: CharacterTokenType.DoubleQuote, /*punctuation.double-quote.renpy*/ + match: /"/g, + }, + { + debugName: "fallbackCharacters.patterns![15]", + + token: CharacterTokenType.BackQuote, /*punctuation.back-quote.renpy*/ + match: /`/g, + }, + { + debugName: "fallbackCharacters.patterns![16]", + + token: CharacterTokenType.Backslash, /*punctuation.backslash.renpy*/ + match: /\\/g, + }, + { + debugName: "fallbackCharacters.patterns![17]", + + token: CharacterTokenType.ForwardSlash, /*punctuation.forward-slash.renpy*/ + match: /\//g, + }, + { + debugName: "fallbackCharacters.patterns![18]", + + token: CharacterTokenType.EqualsSymbol, /*punctuation.equals-symbol.renpy*/ + match: /=/g, + }, + { + debugName: "fallbackCharacters.patterns![19]", + + token: MetaTokenType.Invalid, /*invalid.illegal.word.renpy*/ + match: /\b\w+\b/g, + }, + { + debugName: "fallbackCharacters.patterns![20]", + + token: CharacterTokenType.Unknown, /*punctuation.unknown.renpy*/ + match: /./g, + }, + ] +}; + +export const literal: TokenPattern = { + patterns: [ + { + debugName: "literal.patterns![0]", + + // Python literals + token: MetaTokenType.ConstantLiteral, /*constant.language.renpy*/ + match: /\b(?> // Assignment operators - Assign, // = + Assignment, // = PlusAssign, // += MinusAssign, // -= MultiplyAssign, // *= @@ -163,8 +190,21 @@ export const enum OperatorTokenType { Is, // is IsNot, // is not - In, // in NotIn, // not in + + Unpacking, // * or ** + PositionalParameter, // / + + // Regex operators + Quantifier, // [+*?]\?? + Disjunction, // | + Negation, // ^ + Lookahead, // (?= + LookaheadNegative, // (?! + Lookbehind, // (?<= + LookbehindNegative, // (?= this.start && position <= this.end; + } +} export class TokenPosition { line: number; @@ -53,8 +72,6 @@ export class TokenPosition { export class Token { readonly tokenType: TokenType; - - // README: The tokenizer abuses that 'startPos' and 'endPos' are reference objects to move the positions! readonly startPos: TokenPosition; readonly endPos: TokenPosition; @@ -64,7 +81,7 @@ export class Token { this.endPos = endPos; } - public getRange() { + public getVSCodeRange() { const start = new Position(this.startPos.line, this.startPos.character); const end = new Position(this.endPos.line, this.endPos.character); @@ -72,7 +89,11 @@ export class Token { console.warn(`Empty token detected at L: ${start.line + 1}, C: ${start.character + 1} !`); } - return new Range(start, end); + return new VSRange(start, end); + } + + public getRange() { + return new Range(this.startPos.charStartOffset, this.endPos.charStartOffset); } public isKeyword() { @@ -123,3 +144,76 @@ export function isMatchPattern(p: TokenPattern): p is TokenMatchPattern { export function isRepoPattern(p: TokenPattern): p is TokenRepoPattern { return !isRangePattern(p) && (p as TokenRepoPattern).patterns !== undefined; } + +export class TreeNode { + public token: Token | null; + public children: Vector; + + constructor(token: Token | null = null) { + this.token = token; + this.children = new Vector(); + } + + public addChild(child: TreeNode): void { + this.children.pushBack(child); + } + + public hasChildren(): boolean { + return !this.children.isEmpty(); + } + + public isEmpty(): boolean { + return this.token === null && !this.hasChildren(); + } + + // Recursively iterate over all children + public forEach(callback: (node: TreeNode) => void): void { + this.children.forEach((child) => { + callback(child); + child.forEach(callback); + }); + } + + public filter(callback: (node: TreeNode) => boolean): TreeNode[] { + const result: TreeNode[] = []; + this.forEach((node) => { + if (callback(node)) { + result.push(node); + } + }); + return result; + } + + public count(): number { + // Recursively iterate over all children + let count = 0; + this.forEach(() => { + ++count; + }); + return count; + } +} + +export class TokenTree { + public root: TreeNode; + + constructor() { + this.root = new TreeNode(); + } + + public isEmpty(): boolean { + return !this.root.hasChildren(); + } + + public forEach(callback: (node: TreeNode) => void): void { + this.root.forEach(callback); + } + + public filter(callback: (node: TreeNode) => boolean): TreeNode[] { + return this.root.filter(callback); + } + + public count(): number { + return this.root.count(); + } +} diff --git a/src/tokenizer/token-pattern-types.ts b/src/tokenizer/token-pattern-types.ts index 9c10a95..76d82a0 100644 --- a/src/tokenizer/token-pattern-types.ts +++ b/src/tokenizer/token-pattern-types.ts @@ -1,6 +1,10 @@ import { TokenType } from "./renpy-tokens"; -export interface TokenCapturePattern { +interface TokenPatternDebugInfo { + readonly debugName?: string; +} + +export interface TokenCapturePattern extends TokenPatternDebugInfo { readonly token?: TokenType; readonly patterns?: TokenPatternArray; } @@ -54,9 +58,5 @@ export interface TokenMatchPattern { endCaptures?: never; } -interface TokenPatternDebugInfo { - readonly debugName?: string; -} - export declare type TokenPattern = (TokenRangePattern | TokenMatchPattern | TokenRepoPattern) & TokenPatternDebugInfo; export declare type TokenPatternArray = Array; diff --git a/src/tokenizer/token-patterns.g.ts b/src/tokenizer/token-patterns.g.ts new file mode 100644 index 0000000..cd6791a --- /dev/null +++ b/src/tokenizer/token-patterns.g.ts @@ -0,0 +1,59 @@ +/* eslint-disable @typescript-eslint/no-non-null-assertion */ + +// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR +// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT. +// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED. +// Last generated: 01/06/2023 14:57:48 (UTC+0) + +import * as AtlPatterns from "./atl-token-patterns.g"; +import * as RenpyPatterns from "./renpy-token-patterns.g"; +import * as PythonPatterns from "./python-token-patterns.g"; + +// Push all RenpyPatterns external includes +RenpyPatterns.keywords.patterns![1].patterns!.push(PythonPatterns.expression); +RenpyPatterns.constantPlaceholder.captures![2].patterns!.push(PythonPatterns.expression); +RenpyPatterns.pythonStatements.patterns![1].patterns!.push(PythonPatterns.python); +RenpyPatterns.pythonStatements.patterns![2].patterns!.splice(2, 0, PythonPatterns.expression); +RenpyPatterns.sayStatements.patterns![0].endCaptures![3].patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.sayStatements.patterns![1].endCaptures![3].patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.transform.patterns!.push(AtlPatterns.atl); +RenpyPatterns.image.patterns![0].patterns!.push(AtlPatterns.atl); +RenpyPatterns.show.patterns![0].patterns!.push(AtlPatterns.atl); +RenpyPatterns.scene.patterns![0].patterns!.push(AtlPatterns.atl); +RenpyPatterns.camera.patterns![0].patterns!.push(AtlPatterns.atl); +RenpyPatterns.screenDefName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +RenpyPatterns.screenSimpleExpression.patterns!.push(PythonPatterns.literal, PythonPatterns.memberAccess, PythonPatterns.illegalOperator, PythonPatterns.operator, PythonPatterns.curlyBraces, PythonPatterns.itemAccess, PythonPatterns.list, PythonPatterns.oddFunctionCall, PythonPatterns.roundBraces, PythonPatterns.functionCall, PythonPatterns.builtinFunctions, PythonPatterns.builtinTypes, PythonPatterns.builtinExceptions, PythonPatterns.magicNames, PythonPatterns.specialNames, PythonPatterns.illegalNames, PythonPatterns.specialVariables, PythonPatterns.ellipsis, PythonPatterns.punctuation, PythonPatterns.lineContinuation); +RenpyPatterns.screenText.patterns![0].patterns!.push(AtlPatterns.atl); +RenpyPatterns.screen.patterns![0].beginCaptures![3].patterns!.splice(1, 0, PythonPatterns.parameters); +RenpyPatterns.labelName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +RenpyPatterns.labelCall.patterns!.splice(0, 0, PythonPatterns.specialVariables); +RenpyPatterns.labelCall.patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.labelDefName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +RenpyPatterns.label.captures![2].patterns!.splice(1, 0, PythonPatterns.parameters); +RenpyPatterns.returnStatements.patterns!.push(PythonPatterns.expression); +RenpyPatterns.callJumpExpression.patterns!.push(PythonPatterns.expression); +RenpyPatterns.callPass.patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.menuOption.beginCaptures![3].patterns!.splice(0, 0, PythonPatterns.functionArguments); +RenpyPatterns.menuOption.beginCaptures![3].patterns![1].captures![2].patterns!.push(PythonPatterns.expressionBare); +RenpyPatterns.menuSet.captures![2].patterns!.push(PythonPatterns.expressionBare); +RenpyPatterns.menu.beginCaptures![3].patterns!.push(PythonPatterns.functionArguments); +RenpyPatterns.audioParams.patterns!.push(PythonPatterns.number); +RenpyPatterns.play.patterns![0].patterns!.push(PythonPatterns.expression); +RenpyPatterns.queue.patterns![0].patterns!.push(PythonPatterns.expression); +RenpyPatterns.stop.patterns![0].patterns!.push(PythonPatterns.number); + +// Push all AtlPatterns external includes +AtlPatterns.atlSimpleExpression.patterns!.splice(0, 0, RenpyPatterns.expressions); +AtlPatterns.atlSimpleExpression.patterns!.push(PythonPatterns.literal, PythonPatterns.memberAccess, PythonPatterns.illegalOperator, PythonPatterns.operator, PythonPatterns.curlyBraces, PythonPatterns.itemAccess, PythonPatterns.list, PythonPatterns.oddFunctionCall, PythonPatterns.roundBraces, PythonPatterns.functionCall, PythonPatterns.builtinFunctions, PythonPatterns.builtinTypes, PythonPatterns.builtinExceptions, PythonPatterns.magicNames, PythonPatterns.specialNames, PythonPatterns.illegalNames, PythonPatterns.specialVariables, PythonPatterns.ellipsis, PythonPatterns.punctuation, PythonPatterns.lineContinuation); +AtlPatterns.atlExpression.patterns!.push(PythonPatterns.expression); +AtlPatterns.atlEventName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +AtlPatterns.atlEventDefName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +AtlPatterns.atlOn.beginCaptures![3].patterns!.push(RenpyPatterns.comments); +AtlPatterns.atlFunction.captures![2].patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); +AtlPatterns.atlWarperName.patterns!.splice(0, 0, PythonPatterns.builtinPossibleCallables); + +// Push all PythonPatterns external includes +PythonPatterns.stringUnicodeGuts.patterns!.splice(0, 0, RenpyPatterns.stringsInterior); + + +export { AtlPatterns, RenpyPatterns, PythonPatterns }; \ No newline at end of file diff --git a/src/tokenizer/token-patterns.ts b/src/tokenizer/token-patterns.ts deleted file mode 100644 index ceb3fe0..0000000 --- a/src/tokenizer/token-patterns.ts +++ /dev/null @@ -1,953 +0,0 @@ -/* eslint-disable no-useless-escape */ -/* eslint-disable @typescript-eslint/no-non-null-assertion */ -/* eslint-disable no-useless-backreference */ - -import { CharacterTokenType, LiteralTokenType, EntityTokenType, KeywordTokenType, MetaTokenType, OperatorTokenType } from "./renpy-tokens"; -import { comments, expressions, newLine, statements, charactersPatten, whiteSpace, numFloat, numInt, invalidToken, strings, stringsInterior } from "./common-token-patterns"; -import { atl } from "./atl-token-patterns"; -import { pythonBuiltinPossibleCallables, pythonExpression, pythonExpressionBare, pythonFunctionArguments, pythonNumber, pythonSpecialVariables } from "./python-token-patterns"; -import { TokenPattern } from "./token-pattern-types"; -// NOTE: These patterns are converted from the tmLanguage file. -// ANY CHANGES MADE HERE SHOULD BE PORTED TO THAT FILE AS WELL -// Copy the patterns (the contents of the repository group) over and apply the following find and replace patterns: - -// find: ^( +)"(\w+?)(?:[\-_](\w+?))?(?:[\-_](\w+?))?(?:[\-_](\w+?))?": \{$\n((?:^.*$\n)+?)^\1\},? -// replace with: const \L$2\u$3\u$4\u$5: TokenPattern = {\n$6}; - -// find: \{ "include": "#?(\w+?)(?:[\-_](\w+?))?(?:[\-_](\w+?))?(?:[\-_](\w+?))?" \} -// find: \{ "include": "([\w\.]+)#?(\w+?)(?:[\-_](\w+?))?(?:[\-_](\w+?))?(?:[\-_](\w+?))?" \} -// replace with: \L$1\u$2\u$3\u$4 - -// find: (?<=^ *|\{ )"comment": "(.*?)"(?=,$| \}),? -// replace with: // $1 - -// find: (?<=^ *|\{ )"name": "(.*?)"(?=,$| \}) -// replace with: token: "$1" - -// find: (?<=^ *|\{ )"contentName": "(.*?)"(?=,$| \}) -// replace with: contentToken: "$1" - -// find: (?<=^ *|\{ )"(.*?)"(?=: [{["]) -// replace with: $1 - -// find: (?<=(?:^ *|\{ )(?:match|begin|end): /.*?)\\\\(?=.*?/dg,?$) -// replace with: \ - -// find: (?<=(?:^ *|\{ )(?:match|begin|end): )"(.*?)"(?=,?$) -// replace with: /$1/dg - -// Result should be manually fixed -// Make sure to include this in internal captures to detect all newline tokens - -const lineContinuationPattern = /^(?=(?!\1)[ \t]*[^\s#]|\1[^\s#])|\Z/gm; - -export const basePatterns: TokenPattern = { - debugName: "basePatterns", - - patterns: [statements, expressions], -}; - -const literal: TokenPattern = { - debugName: "literal", - - patterns: [ - { - // Python literals, - token: LiteralTokenType.Boolean, - match: /\b(? { + if (obj === undefined) return undefined; + if (obj === null) return null; + return { ...obj }; +}; + +const cloneCache = (obj: Array): Array => { + const clone = new Array(obj.length); + for (let i = 0; i < obj.length; ++i) { + clone[i] = cloneScanResult(obj[i]); + } + return clone; +}; + +interface MatchScanResult { + pattern: ExTokenPattern; + matchBegin: RegExpExecArray; + matchEnd?: never; + expanded?: never; + contentMatches?: never; + source?: never; +} +interface RangeScanResult { + pattern: ExTokenPattern; + matchBegin: RegExpExecArray; + matchEnd: RegExpExecArray | null; + expanded: boolean; + contentMatches: Stack | null; + source: string; +} +type ScanResult = MatchScanResult | RangeScanResult | null; +type TokenCache = { readonly documentVersion: number; readonly tokens: TokenTree }; const tokenCache = new Map(); const runBenchmark = false; let uniquePatternCount = -1; -export function tokenizeDocument(document: TextDocument): Token[] { +export function tokenizeDocument(document: TextDocument): TokenTree { setupAndValidatePatterns(); - if (runBenchmark) benchmark(document); + if (runBenchmark) { + benchmark(document); + } const cachedTokens = tokenCache.get(document.uri); if (cachedTokens?.documentVersion === document.version) { @@ -29,9 +60,8 @@ export function tokenizeDocument(document: TextDocument): Token[] { console.log(`Running tokenizer on document: ${document.fileName}`); const tokenizer = new DocumentTokenizer(document); console.log(`Tokenizer completed!`); - const tokens = tokenizer.tokens.toArray(); - tokenCache.set(document.uri, { documentVersion: document.version, tokens: tokens }); - return tokens; + tokenCache.set(document.uri, { documentVersion: document.version, tokens: tokenizer.tokens }); + return tokenizer.tokens; } export function clearTokenCache() { @@ -86,9 +116,10 @@ function setupAndValidatePatterns() { uniquePatternCount = 0; const stack = new Stack(32); - stack.push(basePatterns as ExTokenRepoPattern); + stack.push(RenpyPatterns.basePatterns as ExTokenRepoPattern); const mFlagRe = /(? = new Vector(16384); + public readonly tokens: TokenTree = new TokenTree(); + private readonly document: TextDocument; constructor(document: TextDocument) { + this.document = document; const text = document.getText(); - const carret = new TokenPosition(0, 0, 0); - this.executePattern(basePatterns as ExTokenRepoPattern, text, carret); + this.executePattern(RenpyPatterns.basePatterns as ExTokenRepoPattern, text, new Range(0, text.length), this.tokens.root); + } + + private checkTokenTreeCoverage(root: TreeNode, matchRange: Range): { valid: boolean; gaps: Range[] } { + // Collect all token ranges + const tokenRanges = new Vector(root.count()); + if (root.token) { + tokenRanges.pushBack(root.token.getRange()); + } + root.forEach((node) => { + if (node.token) { + tokenRanges.pushBack(node.token.getRange()); + } + }); + + // Sort the token ranges by their start position + tokenRanges.sort((a, b) => a.start - b.start); + + // Check if the combined ranges of all tokens overlap the entire character range of the match + let currentEnd = matchRange.start; + const gaps: Range[] = []; + for (const range of tokenRanges) { + if (!matchRange.contains(range.start)) { + // The start of the next token range is outside the match range + return { valid: false, gaps }; + } + if (range.start > currentEnd) { + // There is a gap between the current end position and the start of the next token range + gaps.push(new Range(currentEnd, range.start)); + } + currentEnd = Math.max(currentEnd, range.end); + } + if (currentEnd < matchRange.end) { + // The last token range does not extend to the end of the match + gaps.push(new Range(currentEnd, matchRange.end)); + } + return { valid: gaps.length === 0, gaps }; } /** @@ -190,159 +272,195 @@ class DocumentTokenizer { * @param match The match to apply the captures on * @param caret The reader head position within the document */ - private applyCaptures(captures: TokenPatternCapture, match: RegExpExecArray, caret: TokenPosition) { - // TODO: Match 0 is like a second iteration on the previously matched text. - // What needs to happen is that all matches from the first iteration are merged with the matches from the second iteration. - const originalCaret = caret.clone(); + private applyCaptures(captures: TokenPatternCapture, match: RegExpExecArray, source: string, parentNode: TreeNode) { + let rootNode = parentNode; - let lastMatchEnd = match.index; - for (let i = 1; i < match.indices!.length; i++) { - if (match.indices![i] === undefined) continue; // If the object at i is undefined, the capture is empty + if (captures[0] !== undefined) { + // If capture 0 is used, treat it as a wrapper token. + rootNode = new TreeNode(); + parentNode.addChild(rootNode); + } - if (captures[i] === undefined) { - console.warn(`There is no pattern defined for capture group '${i}', on a pattern that matched '${match[i]}' near L:${caret.line + 1} C:${caret.character + 1}.\nThis should probably be added or be a non-capturing group.`); - continue; + for (let i = 1; i < match.indices!.length; i++) { + if (match.indices![i] === undefined) { + continue; // If the object at i is undefined, the capture is empty } - const p = captures[i]; - const content = match[i]; - - // Update the position carets const [startPos, endPos] = match.indices![i]; - // Check for missing characters in a match - const matchOffset = startPos - lastMatchEnd; - if (matchOffset !== 0) { - // TODO: Fix match 0 pattern capture to include the missing tokens - /*console.warn( - `A capture was misaligned (expected: ${startPos}, got: ${lastMatchEnd}) on a pattern that matched '${content}' near L:${caret.line + 1} C:${ - caret.character + 1 - }.\nYou should probably update the pattern to capture everything.\nApplying a fix...` - );*/ - caret.advance(matchOffset); + if (captures[i] === undefined) { + const pos = this.positionAt(startPos); + console.warn(`There is no pattern defined for capture group '${i}', on a pattern that matched '${match[i]}' near L:${pos.line + 1} C:${pos.character + 1}.\nThis should probably be added or be a non-capturing group.`); + continue; } - lastMatchEnd = endPos; - - const startCaret = caret.clone(); - const endCaret = caret.clone(); - endCaret.advance(content.length); // Move caret to end of the current match + const p = captures[i]; + const captureNode = new TreeNode(); if (p.token) { - if (p.token === CharacterTokenType.NewLine) endCaret.nextLine(); - this.tokens.pushBack(new Token(p.token, startCaret, endCaret)); + captureNode.token = new Token(p.token, this.positionAt(startPos), this.positionAt(endPos)); } if (p.patterns) { - const captureCaret = startCaret.clone(); - this.executePattern(p as ExTokenRepoPattern, content, captureCaret); - - assert(captureCaret.charStartOffset === endCaret.charStartOffset, "The token read position was misaligned by the capture context!"); - - if (captureCaret.line !== endCaret.line) { - // Note: Moving the endCaret will also move the token, since this is a reference object - endCaret.setValue(captureCaret); - } + this.executePattern(p as ExTokenRepoPattern, source, new Range(startPos, endPos), captureNode); } - caret.setValue(endCaret); + if (!captureNode.isEmpty()) { + rootNode.addChild(captureNode); + } } - // TODO: This also gets misaligned if the whole match is a token and contains additional captures - // TODO: The system should be updated to build a token list/tree. When that's done we can simply compare and merge the tokens instead of this hack. - // Special case for captures[0] which is the entire match + if (captures[0] !== undefined) { const p = captures[0]; const content = match[0]; - const startCaret = originalCaret.clone(); - const endCaret = originalCaret.clone(); - endCaret.advance(content.length); // Move caret to end of the current match + const startPos = match.index; + const endPos = startPos + content.length; if (p.token) { - if (p.token === CharacterTokenType.NewLine) endCaret.nextLine(); - this.tokens.pushBack(new Token(p.token, startCaret, endCaret)); + rootNode.token = new Token(p.token, this.positionAt(startPos), this.positionAt(endPos)); } if (p.patterns) { - const captureCaret = startCaret.clone(); - this.executePattern(p as ExTokenRepoPattern, content, captureCaret); - - assert(captureCaret.charStartOffset === endCaret.charStartOffset, "The token read position was misaligned by the capture context!"); + const captureNode = new TreeNode(); + rootNode.addChild(captureNode); - if (captureCaret.line !== endCaret.line) { - // Note: Moving the endCaret will also move the token, since this is a reference object - endCaret.setValue(captureCaret); - } + this.executePattern(p as ExTokenRepoPattern, source, new Range(startPos, endPos), captureNode); } - - // TODO: For now assume that having the 0 capture, means all characters have tokens assigned - caret.setValue(endCaret); } } - private scanMatchPattern(pattern: ExTokenMatchPattern, text: string, matchOffsetStart: number, cache: Array): ScanResult { + private scanMatchPattern(pattern: ExTokenMatchPattern, source: string, sourceStartOffset: number): MatchScanResult | null { const re = pattern.match; - re.lastIndex = matchOffsetStart; - const match = re.exec(text); + re.lastIndex = sourceStartOffset; + const match = re.exec(source); if (match) { - const result = { pattern: pattern, matchBegin: match }; - cache[pattern._patternId] = result; + const result = { pattern, matchBegin: match }; return result; } - cache[pattern._patternId] = null; return null; } - private scanRangePattern(next: ExTokenRangePattern, text: string, matchOffsetStart: number, cache: Array): ScanResult { - const reBegin = next.begin; - reBegin.lastIndex = matchOffsetStart; - const matchBegin = reBegin.exec(text); + private scanRangePattern(pattern: ExTokenRangePattern, source: string, sourceStartOffset: number): RangeScanResult | null { + const reBegin = pattern.begin; + reBegin.lastIndex = sourceStartOffset; + const matchBegin = reBegin.exec(source); if (matchBegin) { - let reEnd = next.end; + return { pattern, matchBegin: matchBegin, matchEnd: null, expanded: false, contentMatches: null, source }; + } - // Replace all back references in end source - if (next._hasBackref) { - let reEndSource = next.end.source; + return null; + } - this.backrefReplaceRe.lastIndex = 0; - reEndSource = reEndSource.replace(this.backrefReplaceRe, (_, g1) => { - const content = matchBegin.at(parseInt(g1, 10)); - if (content !== undefined) return escapeRegExpCharacters(content); - return ""; - }); + private expandRangeScanResult(result: RangeScanResult, cache: Array) { + const p = result.pattern as ExTokenRangePattern; + const matchBegin = result.matchBegin; - reEnd = new RegExp(reEndSource, next.end.flags); - } + let reEnd = p.end; + + // Replace all back references in end source + if (p._hasBackref) { + let reEndSource = p.end.source; - // Start end pattern after the last matched character in the begin pattern - reEnd.lastIndex = matchBegin.index + matchBegin[0].length; - const matchEnd = reEnd.exec(text); + this.backrefReplaceRe.lastIndex = 0; + reEndSource = reEndSource.replace(this.backrefReplaceRe, (_, g1) => { + const backref = matchBegin.at(parseInt(g1, 10)); + if (backref !== undefined) { + return escapeRegExpCharacters(backref); + } + return ""; + }); - if (matchEnd) { - const result = { pattern: next, matchBegin: matchBegin, matchEnd: matchEnd }; - cache[next._patternId] = result; - return result; + reEnd = new RegExp(reEndSource, p.end.flags); + } + + // Start end pattern after the last matched character in the begin pattern + reEnd.lastIndex = matchBegin.index + matchBegin[0].length; + let matchEnd = reEnd.exec(result.source); + const contentMatches = new Stack(); + + if (matchEnd) { + // Check if any child pattern has content that would extend the currently determined end match + if (p._patternsRepo) { + const contentStartIndex = matchBegin.index + matchBegin[0].length; + const contentEndIndex = matchEnd.index; + const lastMatchedChar = matchEnd.index + matchEnd[0].length; + + // Scan the content for any matches that would extend beyond the current end match + const tempCache = cloneCache(cache); + //const tempCache = new Array(uniquePatternCount).fill(undefined); + + for (let lastMatchIndex = contentStartIndex; lastMatchIndex < contentEndIndex; ) { + const bestChildMatch = this.scanPattern(p._patternsRepo, result.source, lastMatchIndex, tempCache); + if (!bestChildMatch) { + break; // No more matches + } + + // Update the last match index to the end of the child match, so the next scan starts after it + const childMatchBegin = bestChildMatch.matchBegin; + if (bestChildMatch.pattern._patternType === TokenPatternType.RangePattern) { + const childMatchEnd = bestChildMatch.matchEnd!; + lastMatchIndex = childMatchEnd.index + childMatchEnd[0].length; + } else { + lastMatchIndex = childMatchBegin.index + childMatchBegin[0].length; + } + + // Check if the match starts after the currently determined end match start, if so we ignore it + if (childMatchBegin.index >= contentEndIndex) { + continue; + } + + // To speed up the search, we can add any tokens that are within the content range + contentMatches.push(bestChildMatch); + + // If the child match last char doesn't extend the current range, we can also ignore it + if (lastMatchIndex <= lastMatchedChar) { + continue; + } + + // The child match is outside the range, so we should find a new end match + reEnd.lastIndex = lastMatchIndex; + matchEnd = reEnd.exec(result.source); + + // If no end match could be found, assume the whole pattern is invalid + if (!matchEnd) { + break; + } + } } } - cache[next._patternId] = null; - return null; + if (!matchEnd) { + // If no end match could be found, we'll need to expand the range to the end of the source + const reLastChar = /$(?!\r\n|\r|\n)/g; + reLastChar.lastIndex = Math.max(0, result.source.length - 1); + matchEnd = reLastChar.exec(result.source); + } + + result.matchEnd = matchEnd; + result.contentMatches = contentMatches; + result.expanded = true; } /** * Scans the text for the best matching pattern. * @param p The pattern to use for matches - * @param text The text to match on - * @param matchOffsetStart The character offset in 'text' to start the match at. + * @param source The text to match on + * @param sourceStartOffset The character offset in 'text' to start the match at. */ - public scanPattern(p: ExTokenRepoPattern, text: string, matchOffsetStart: number, cache: Array): ScanResult { - if (p.patterns.length === 0) return null; + public scanPattern(p: ExTokenRepoPattern, source: string, sourceStartOffset: number, cache: Array): ScanResult { + if (p.patterns.length === 0) { + return null; + } const cachedP = cache[p._patternId]; if (cachedP !== undefined) { // If the cached value is null, no match was found in the entire text - if (cachedP === null || cachedP.matchBegin.index >= matchOffsetStart) { + if (cachedP === null || cachedP.matchBegin.index >= sourceStartOffset) { + if (cachedP?.pattern._patternType === TokenPatternType.RangePattern && !cachedP.expanded) { + this.expandRangeScanResult(cachedP as RangeScanResult, cache); + } return cachedP; } } @@ -362,28 +480,34 @@ class DocumentTokenizer { const cachedResult = cache[next._patternId]; if (cachedResult !== undefined) { // If the cached value is null, no match was found in the entire text - if (cachedResult === null) continue; - if (cachedResult.matchBegin.index >= matchOffsetStart) { + if (cachedResult === null) { + continue; + } + + if (cachedResult.matchBegin.index >= sourceStartOffset) { scanResult = cachedResult; } } + // The result wasn't cached or was invalidated, so we need to scan for the next match if (scanResult === null) { switch (next._patternType) { case TokenPatternType.MatchPattern: - scanResult = this.scanMatchPattern(next, text, matchOffsetStart, cache); + scanResult = this.scanMatchPattern(next, source, sourceStartOffset); break; case TokenPatternType.RangePattern: - scanResult = this.scanRangePattern(next, text, matchOffsetStart, cache); + scanResult = this.scanRangePattern(next, source, sourceStartOffset); break; case TokenPatternType.RepoPattern: - scanResult = this.scanPattern(next, text, matchOffsetStart, cache); - cache[next._patternId] = scanResult; + scanResult = this.scanPattern(next, source, sourceStartOffset, cache); break; default: assert(false, "Invalid TokenPatternType found! If this triggers, setupAndValidatePatterns() didn't assign the PatternStateProperties properly."); break; } + + // Cache the result + cache[next._patternId] = scanResult; } if (!scanResult) continue; @@ -398,195 +522,186 @@ class DocumentTokenizer { bestResult = scanResult; // If true, this match is valid at the first possible location. No need to check further. - if (bestMatchRating === matchOffsetStart) { + if (bestMatchRating === sourceStartOffset) { break; } } + cache[p._patternId] = bestResult; + + if (bestResult?.pattern._patternType === TokenPatternType.RangePattern && !bestResult.expanded) { + this.expandRangeScanResult(bestResult as RangeScanResult, cache); + } + return bestResult; } + private executeRangePattern(bestMatch: ScanResult, source: string, parentNode: TreeNode) { + if (!bestMatch) { + return; + } + assert(bestMatch.expanded, "A range pattern should always be expanded on execute."); + + const p = bestMatch.pattern as ExTokenRangePattern; + const matchBegin = bestMatch.matchBegin; + const matchEnd = bestMatch.matchEnd; + + const startPos = matchBegin.index; + const endPos = matchEnd!.index + matchEnd![0].length; + const contentStart = matchBegin.index + matchBegin[0].length; + const contentEnd = matchEnd!.index; + + // p.token matches the whole range including the begin and end match content + const rangeNode = new TreeNode(); + if (p.token) { + rangeNode.token = new Token(p.token, this.positionAt(startPos), this.positionAt(endPos)); + } + // Begin captures are only applied to beginMatch[0] content + if (p.beginCaptures) { + this.applyCaptures(p.beginCaptures, matchBegin, source, rangeNode); + } + + // Add an additional node for the content of the range pattern, since the content can be wrapped by an additional token + const contentNode = new TreeNode(); + + // p.contentToken matches the range 'between'; after the end of beginMatch and before the start of endMatch + if (p.contentToken) { + contentNode.token = new Token(p.contentToken, this.positionAt(contentStart), this.positionAt(contentEnd)); + } + + // Patterns are only applied on 'content' (see p.contentToken above) + if (p._patternsRepo) { + /*while (!bestMatch.contentMatches!.isEmpty()) { + const contentScanResult = bestMatch.contentMatches!.pop()!; + this.applyScanResult(contentScanResult, source, contentNode); + }*/ + + this.executePattern(p._patternsRepo, source, new Range(contentStart, matchEnd!.index), contentNode); + } + + if (!contentNode.isEmpty()) { + rangeNode.addChild(contentNode); + } + + // End captures are only applied to endMatch[0] content + if (p.endCaptures) { + this.applyCaptures(p.endCaptures, matchEnd!, source, rangeNode); + } + + //assert(!rangeNode.isEmpty(), "A RangePattern must produce a valid token tree!"); + + const coverageResult = this.checkTokenTreeCoverage(rangeNode, new Range(startPos, endPos)); + if (!coverageResult.valid) { + console.warn(`The token tree is not covering the entire match range!`); + for (const gap of coverageResult.gaps) { + const gapStartPos = this.document.positionAt(gap.start); + const gapEndPos = this.document.positionAt(gap.end); + const text = this.document.getText(new VSRange(gapStartPos, gapEndPos)); + console.warn(`Gap from L${gapStartPos.line + 1}:${gapStartPos.character + 1} to L${gapEndPos.line + 1}:${gapEndPos.character + 1}, Text: '${text}'`); + } + } + + parentNode.addChild(rangeNode); + } + + private executeMatchPattern(bestMatch: MatchScanResult, source: string, parentNode: TreeNode) { + const p = bestMatch.pattern as ExTokenMatchPattern; + const match = bestMatch.matchBegin; + + const contentNode = new TreeNode(); + const startPos = match.index; + const endPos = startPos + match[0].length; + + if (p.token) { + contentNode.token = new Token(p.token, this.positionAt(startPos), this.positionAt(endPos)); + } + + if (p.captures) { + this.applyCaptures(p.captures, match, source, contentNode); + } + + const coverageResult = this.checkTokenTreeCoverage(contentNode, new Range(startPos, endPos)); + if (!coverageResult.valid) { + console.warn(`The token tree is not covering the entire match range!`); + for (const gap of coverageResult.gaps) { + const gapStartPos = this.document.positionAt(gap.start); + const gapEndPos = this.document.positionAt(gap.end); + const text = this.document.getText(new VSRange(gapStartPos, gapEndPos)); + console.warn(`Gap from L${gapStartPos.line + 1}:${gapStartPos.character + 1} to L${gapEndPos.line + 1}:${gapEndPos.character + 1}, Text: '${text}'`); + } + } + + assert(p.token || p.captures, "A MatchPattern must have either a token or captures!"); + assert(!contentNode.isEmpty(), "A MatchPattern must produce a valid token tree!"); + + parentNode.addChild(contentNode); + } + + private applyScanResult(bestMatch: ScanResult, source: string, parentNode: TreeNode) { + const p = bestMatch!.pattern; + switch (p._patternType) { + case TokenPatternType.RangePattern: + this.executeRangePattern(bestMatch as RangeScanResult, source, parentNode); + break; + case TokenPatternType.MatchPattern: + this.executeMatchPattern(bestMatch as MatchScanResult, source, parentNode); + break; + default: + assert(false, "Should not get here!"); + break; + } + } + /** * Executes the pattern on 'text', adding tokens to the token list. * @param pattern The repo pattern to use for matches. - * @param text The text to match on. + * @param source The text to match on. * @param caret The location of the reader head * @returns True if the pattern was matched * @todo Timeout after it was running too long */ - public executePattern(pattern: ExTokenRepoPattern, text: string, caret: TokenPosition) { + public executePattern(pattern: ExTokenRepoPattern, source: string, sourceRange: Range, parentNode: TreeNode) { + if (source.length === 0) { + return; + } + const cache = new Array(uniquePatternCount).fill(undefined); - const initialCharOffset = caret.charStartOffset; - const lastCharIndex = text.length; + const lastCharIndex = sourceRange.end; - for (let lastMatchIndex = 0; lastMatchIndex < lastCharIndex; ) { - const bestMatch = this.scanPattern(pattern, text, lastMatchIndex, cache); + let lastMatchIndex = sourceRange.start; + while (lastMatchIndex < lastCharIndex) { + const bestMatch = this.scanPattern(pattern, source, lastMatchIndex, cache); - if (bestMatch === null) { + if (bestMatch === null || bestMatch.matchBegin.index >= lastCharIndex) { // No match was found in the remaining text. Break the loop - caret.advance(lastCharIndex - lastMatchIndex); lastMatchIndex = lastCharIndex; continue; } + const failSafeIndex = lastMatchIndex; // Debug index to break in case of an infinite loop - const p = bestMatch.pattern; - switch (p._patternType) { - case TokenPatternType.RangePattern: - { - const matchBegin = bestMatch.matchBegin; - const matchEnd = bestMatch.matchEnd!; - lastMatchIndex = matchEnd.index + matchEnd[0].length; - const contentLength = matchEnd.index - (matchBegin.index + matchBegin[0].length); - - // Check for missing characters in a match - const matchOffset = initialCharOffset + matchBegin.index - caret.charStartOffset; - if (matchOffset !== 0) { - /*console.warn( - `A range begin match was misaligned (expected: ${initialCharOffset + matchBegin.index}, got: ${caret.charStartOffset}) on pattern '${p.begin.source}' that matched '${matchBegin[0]}' near L:${caret.line + 1} C:${ - caret.character + 1 - }.\nYou probably didn't catch all characters in the match or the match before this one.\nApplying a fix...`; - caret.advance(matchOffset); - );*/ - } - - const startCaret = caret.clone(); - - const contentStartCaret = startCaret.clone(); - contentStartCaret.advance(matchBegin[0].length); // Move caret to end of beginMatch - - const contentEndCaret = contentStartCaret.clone(); - contentEndCaret.advance(contentLength); // Move caret to end of content - - const endCaret = contentEndCaret.clone(); - endCaret.advance(matchEnd[0].length); // Move caret to end of match - - // p.token matches the whole range including the begin and end match content - if (p.token) { - this.tokens.pushBack(new Token(p.token, startCaret, endCaret)); - } - // Begin captures are only applied to beginMatch[0] content - if (p.beginCaptures) { - const captureCaret = startCaret.clone(); - this.applyCaptures(p.beginCaptures, matchBegin, captureCaret); - - assert( - captureCaret.charStartOffset === contentStartCaret.charStartOffset, - "The token read position was misaligned by the capture context! This means {p.beginCaptures} is not processing white spaces and new lines (individually!)." - ); - - if (captureCaret.line !== endCaret.line) { - // Line was moved, all characters should reset to 0 and move by content length - contentStartCaret.setValue(captureCaret); - - contentEndCaret.setValue(contentStartCaret); - contentEndCaret.advance(contentLength); // Move caret to end of content - - // Note: Moving the endCaret will also move the token, since this is a reference object - endCaret.setValue(contentEndCaret); - endCaret.advance(matchEnd[0].length); // Move caret to end of match - } - } - - // p.contentToken matches the range 'between'; after the end of beginMatch and before the start of endMatch - if (p.contentToken) { - this.tokens.pushBack(new Token(p.contentToken, contentStartCaret, contentEndCaret)); - } - - // Patterns are only applied on 'content' (see p.contentToken above) - if (p._patternsRepo) { - const captureCaret = contentStartCaret.clone(); - const content = text.substring(matchBegin.index + matchBegin[0].length, matchEnd.index); - this.executePattern(p._patternsRepo, content, captureCaret); - - assert( - captureCaret.charStartOffset === contentEndCaret.charStartOffset, - "The token read position was misaligned by the capture context! This means {p.patterns} is not processing white spaces and new lines (individually!)." - ); - - if (captureCaret.line !== endCaret.line) { - // Line was moved, all characters should reset to 0 and move by content length - contentEndCaret.setValue(captureCaret); - - // Note: Moving the endCaret will also move the token, since this is a reference object - endCaret.setValue(contentEndCaret); - endCaret.advance(matchEnd[0].length); // Move caret to end of match - } - } - - // End captures are only applied to endMatch[0] content - if (p.endCaptures) { - const captureCaret = contentEndCaret.clone(); - this.applyCaptures(p.endCaptures, matchEnd, captureCaret); - - assert( - captureCaret.charStartOffset === endCaret.charStartOffset, - "The token read position was misaligned by the capture context! This means {p.endCaptures} is not processing white spaces and new lines (individually!)." - ); - - if (captureCaret.line !== endCaret.line) { - // Line was moved, all characters should reset to 0 and move by content length - // Note: Moving the endCaret will also move the token, since this is a reference object - endCaret.setValue(captureCaret); - } - } - - caret.setValue(endCaret); - } - break; - case TokenPatternType.MatchPattern: - { - const match = bestMatch.matchBegin; - lastMatchIndex = match.index + match[0].length; - - // Check for missing characters in a match - const matchOffset = initialCharOffset + match.index - caret.charStartOffset; - if (matchOffset !== 0) { - /*console.warn( - `A match was misaligned (expected: ${initialCharOffset + match.index}, got: ${caret.charStartOffset}) on pattern '${p.match.source}' that matched '${match[0]}' near L:${caret.line + 1} C:${ - caret.character + 1 - }.\nYou probably didn't catch all characters in the match or the match before this one.\nApplying a fix...` - );*/ - caret.advance(matchOffset); - } - - const startCaret = caret.clone(); - - const endCaret = startCaret.clone(); - endCaret.advance(match[0].length); // Move carret to end of match - - if (p.token) { - if (p.token === CharacterTokenType.NewLine) endCaret.nextLine(); - - this.tokens.pushBack(new Token(p.token, startCaret, endCaret)); - } - - if (p.captures) { - const captureCaret = startCaret.clone(); - this.applyCaptures(p.captures, match, captureCaret); - - assert(captureCaret.charStartOffset === endCaret.charStartOffset, "The token read position was misaligned by the capture context!"); - - // Note: Moving the endCaret will also move the token, since this is a reference object - if (captureCaret.line !== endCaret.line) { - endCaret.setValue(captureCaret); - } - } - - caret.setValue(endCaret); - } - break; - default: - assert(false, "Should not get here!"); - break; + if (bestMatch.pattern._patternType === TokenPatternType.RangePattern) { + assert(bestMatch.expanded, "A RangePattern must be expanded!"); + const matchEnd = bestMatch.matchEnd!; + lastMatchIndex = matchEnd.index + matchEnd[0].length; + } else { + const matchBegin = bestMatch.matchBegin; + lastMatchIndex = matchBegin.index + matchBegin[0].length; } + this.applyScanResult(bestMatch, source, parentNode); + if (failSafeIndex === lastMatchIndex) { console.error("The loop has not advanced since the last cycle. This indicates a programming error. Breaking the loop!"); break; } } } + + public positionAt(offset: number): TokenPosition { + const pos = this.document.positionAt(offset); + return new TokenPosition(pos.line, pos.character, offset); + } } // eslint-disable-next-line no-shadow diff --git a/src/utilities/vector.ts b/src/utilities/vector.ts index 158aa99..7182e8e 100644 --- a/src/utilities/vector.ts +++ b/src/utilities/vector.ts @@ -50,6 +50,17 @@ export class Vector implements Iterable { } } + /** + * Sort the vector using the specified comparison function + * @param compareFn The comparison function to use + */ + public sort(compareFn: (a: T, b: T) => number) { + this.buffer.sort((a, b) => { + if (a === null || b === null) return 0; + return compareFn(a, b); + }); + } + /** * Get the item at the end of the vector, and remove it from the vector * @returns The item at the end @@ -135,6 +146,12 @@ export class Vector implements Iterable { this.buffer[elementIndexA] = temp; } + public forEach(callback: (item: T) => void) { + for (let i = 0; i < this.itemCount; ++i) { + callback(this.buffer[i] as T); + } + } + /** * Swap the item at the specified index with the item at the back of the vector * @param index The index of the item to swap to the back diff --git a/syntax-to-token-pattern.py b/syntax-to-token-pattern.py index 56719d0..7fd47f3 100644 --- a/syntax-to-token-pattern.py +++ b/syntax-to-token-pattern.py @@ -1,18 +1,17 @@ +from datetime import datetime import json import re from typing import Any -PATTERN_PREFIX = "python#" +from dataclasses import dataclass, field -# load the input data from the file -with open("./syntaxes/renpy.python.tmLanguage.json", "r") as file: - data = json.load(file) - -# get the repository value -repository = data.get("repository", {}) - -# Create an empty list to store the typescript strings -typescript_entries: list[str] = [] +@dataclass +class GeneratorState: + defined_variables: list[str] = field(default_factory=list[str]) + used_token_types: list[str] = field(default_factory=list[str]) + pattern_include_entries: list[str] = field(default_factory=list[str]) + external_pattern_include_entries: list[str] = field(default_factory=list[str]) + source_imports: list[str] = field(default_factory=list[str]) def get_indent(indent: int) -> str: return " " * indent @@ -21,48 +20,375 @@ def camelCase(st: str): output = ''.join(x for x in st.title() if x.isalnum()) return output[0].lower() + output[1:] -def get_token_type(name: str) -> str: - if "invalid." in name: - return "MetaTokenType.Invalid" - elif "storage.type.string." in name: - return "LiteralTokenType.String" - - tokenPrefix = "" - token = name.replace(".python", "") - - if "meta." in token: - tokenPrefix = "MetaTokenType." - token = token.replace("meta.", "") - elif "keyword." in token: - tokenPrefix = "KeywordTokenType." - token = token.replace("keyword.", "") - elif "entity." in token: - tokenPrefix = "EntityTokenType." - token = token.replace("entity.", "") +def titleCase(st: str): + return ''.join(x for x in st.title() if x.isalnum()) + +def convert_token_type_split(name: str) -> str: + token_parts = name.split(".") + if len(token_parts) <= 1: + return "Error" + + def get_part(index: int) -> str: + if index >= len(token_parts): + return "" + return token_parts[index] + + def get_parts(range: slice) -> str: + if range.start >= len(token_parts): + return "" + return ".".join(token_parts[range]) - return "MetaTokenType.Invalid" # tokenPrefix + camelCase(token) + if get_part(0) == "string": + if get_part(1) == "quoted" and get_part(2) == "docstring": + return "MetaTokenType.Docstring" + else: + return "LiteralTokenType.String" + + elif get_part(0) == "variable": + return "EntityTokenType.VariableName" + + elif get_part(0) == "storage": + if get_part(1) == "type": + if get_part(2) == "string": + return "MetaTokenType.StringStorageType" + elif get_part(2) == "format": + return "MetaTokenType.FormatStorageType" + elif get_part(2) == "class": + return "KeywordTokenType.Class" + elif get_part(2) == "imaginary": + return "MetaTokenType.ImaginaryNumberStorageType" + elif get_part(2) == "number": + return "MetaTokenType.NumberStorageType" + elif get_part(2) == "function": + if get_part(3) == "lambda": + return "KeywordTokenType.Lambda" + elif get_part(3) == "async": + return "KeywordTokenType.Async" + elif get_part(3) == "label": + return "KeywordTokenType.Label" + else: + return "KeywordTokenType.Def" + + elif get_part(1) == "modifier": + if get_part(2) == "declaration": + return "KeywordTokenType." + titleCase(get_parts(slice(3, -1))) + elif get_part(2) == "flag": + return "MetaTokenType.ModifierFlagStorageType" + + elif get_part(0) == "constant": + if get_part(1) == "numeric": + if get_part(2) == "integer": + return "LiteralTokenType.Integer" + elif get_part(2) == "float": + return "LiteralTokenType.Float" + elif get_part(2) == "boolean": + return "LiteralTokenType.Boolean" + elif get_part(2) == "character": + return "LiteralTokenType.Character" + elif get_part(2) == "escape": + return "LiteralTokenType.Escape" + else: + return "MetaTokenType.ConstantNumeric" + + elif get_part(1) == "language": + return "MetaTokenType.ConstantLiteral" + + elif get_part(1) == "color": + return "LiteralTokenType.Color" + + elif get_part(1) == "character": + if get_part(2) == "escape": + if get_part(3) == "python" or get_part(3) == "regexp": + return "MetaTokenType.EscapeSequence" + else: + return "EscapedCharacterTokenType.Esc" + titleCase(get_parts(slice(3, -2))) + + elif get_part(2) == "unicode": + return "MetaTokenType.EscapeSequence" + + elif get_part(2) == "set": + return "MetaTokenType.CharacterSet" + + elif get_part(2) == "format" and get_part(3) == "placeholder": + return "MetaTokenType.Placeholder" + + + elif get_part(1) == "other": + return "MetaTokenType.ConstantCaps" + + elif get_part(0) == "invalid": + if get_part(1) == "deprecated": + return "MetaTokenType.Deprecated" + else: + return "MetaTokenType.Invalid" + + elif get_part(0) == "punctuation": + if get_part(1) == "definition": + if get_part(2) == "tag" or get_part(2) == "dict" or get_part(2) == "inheritance": + if get_part(3) == "begin": + return "CharacterTokenType.OpenBracket" + elif get_part(3) == "end": + return "CharacterTokenType.CloseBracket" + elif get_part(3) == "region": + return "MetaTokenType.CommentRegionTag" + + elif get_part(2) == "list": + if get_part(3) == "begin": + return "CharacterTokenType.OpenSquareBracket" + elif get_part(3) == "end": + return "CharacterTokenType.CloseSquareBracket" + + elif get_part(2) == "arguments" or get_part(2) == "parameters": + if get_part(3) == "begin": + return "CharacterTokenType.OpenParentheses" + elif get_part(3) == "end": + return "CharacterTokenType.CloseParentheses" + + elif get_part(2) == "string": + if get_part(3) == "begin": + return "MetaTokenType.StringBegin" + elif get_part(3) == "end": + return "MetaTokenType.StringEnd" + + elif get_part(2) == "comment": + return "CharacterTokenType.Hashtag" + elif get_part(2) == "decorator": + return "CharacterTokenType.AtSymbol" + + elif get_part(1) == "parenthesis": + if get_part(2) == "begin": + return "CharacterTokenType.OpenParentheses" + elif get_part(2) == "end": + return "CharacterTokenType.CloseParentheses" + + elif get_part(1) == "bracket": + if get_part(2) == "begin": + return "CharacterTokenType.OpenBracket" + elif get_part(2) == "end": + return "CharacterTokenType.CloseBracket" + + elif get_part(1) == "square-bracket": + if get_part(2) == "begin": + return "CharacterTokenType.OpenSquareBracket" + elif get_part(2) == "end": + return "CharacterTokenType.CloseSquareBracket" + + elif get_part(1) == "section": + if (get_part(2) == "python" or get_part(2) =="block" or \ + get_part(2) == "class" or get_part(2) == "function" or \ + get_part(2) == "atl" or get_part(2) == "label" or \ + get_part(2) == "menu" or get_part(2) == "menu-option"): + if get_part(3) == "begin" or get_part(4) == "begin": + return "CharacterTokenType.Colon" + + elif get_part(1) == "separator": + if get_part(2) == "parameters" or get_part(2) == "arguments" or get_part(2) == "element" or get_part(2) == "inheritance": + return "CharacterTokenType.Comma" + elif get_part(2) == "dict" or get_part(2) == "annotation" or get_part(2) == "slice": + return "CharacterTokenType.Colon" + elif get_part(2) == "continuation": + return "CharacterTokenType.Backslash" + elif get_part(2) == "key-value": + return "CharacterTokenType.EqualsSymbol" + else: + return "CharacterTokenType." + titleCase(get_part(2)) + + elif get_part(1) == "character": + if get_part(2) == "set": + if get_part(3) == "begin": + return "CharacterTokenType.OpenSquareBracket" + elif get_part(3) == "end": + return "CharacterTokenType.CloseSquareBracket" + + elif get_part(1) == "comment": + if get_part(2) == "begin": + return "MetaTokenType.CommentBegin" + elif get_part(2) == "end": + return "MetaTokenType.CommentEnd" + + else: + return "CharacterTokenType." + titleCase(get_part(1)) + + elif get_part(0) == "support": + if get_part(1) == "type": + if get_part(2) == "property-name": + return "EntityTokenType.PropertyName" + elif get_part(2) == "class": + return "EntityTokenType.ClassName" + elif get_part(2) == "function": + return "EntityTokenType.FunctionName" + elif get_part(2) == "variable": + return "EntityTokenType.VariableName" + elif get_part(2) == "namespace": + return "EntityTokenType.NamespaceName" + elif get_part(2) == "metaclass": + return "KeywordTokenType.Metaclass" + elif get_part(2) == "exception": + return "MetaTokenType.BuiltinExceptionType" + else: + return "MetaTokenType.BuiltinType" + + elif get_part(1) == "variable": + return "EntityTokenType.VariableName" + + elif get_part(1) == "function": + if get_part(2) == "event": + return "EntityTokenType.EventName" + else: + return "EntityTokenType.FunctionName" + + elif get_part(1) == "other": + if get_part(2) == "match": + if get_part(3) == "any": + return "CharacterTokenType.Period" + elif get_part(3) == "begin": + return "CharacterTokenType.Caret" + elif get_part(3) == "end": + return "CharacterTokenType.DollarSymbol" + + elif get_part(2) == "escape": + return "MetaTokenType.EscapeSequence" + + elif get_part(0) == "comment": + if get_part(1) == "typehint": + return "MetaTokenType.Typehint" + titleCase(get_part(2)) + else: + return "MetaTokenType.Comment" + + elif get_part(0) == "keyword": + if get_part(1) == "operator": + if get_part(2) == "arithmetic": + if get_part(3) == "python" or get_part(3) == "renpy": + return "MetaTokenType.ArithmeticOperator" + else: + return "OperatorTokenType." + titleCase(get_part(3)) + elif get_part(2) == "logical": + return "MetaTokenType.LogicalOperatorKeyword" + elif get_part(2) == "bitwise": + return "MetaTokenType.BitwiseOperatorKeyword" + elif get_part(2) == "comparison": + return "MetaTokenType.ComparisonOperatorKeyword" + elif get_part(2) == "python": + return "MetaTokenType.Operator" + elif get_part(2) == "unpacking": + return "OperatorTokenType." + titleCase(get_part(2)) + else: + return "OperatorTokenType." + titleCase(get_parts(slice(2, -1))) + + elif get_part(1) == "codetag": + return "MetaTokenType.CommentCodeTag" + + elif get_part(1) == "control": + if get_part(2) == "flow": + if get_part(3) == "python" or get_part(3) == "renpy": # TODO + return "MetaTokenType.ControlFlowKeyword" + else: + return "KeywordTokenType." + titleCase(get_part(3)) + elif get_part(2) == "import": + return "KeywordTokenType.Import" + elif get_part(2) == "conditional": + return "KeywordTokenType.If" + + elif get_part(1) == "illegal" and get_part(2) == "name": + return "MetaTokenType.Invalid" + else: + return "KeywordTokenType." + titleCase(get_parts(slice(1, -1))) + + elif get_part(0) == "entity": + if get_part(1) == "name": + if get_part(2) == "type": + return "EntityTokenType." + titleCase(get_part(3)) + "Name" + else: + return "EntityTokenType." + titleCase(get_part(2)) + "Name" + + elif get_part(1) == "other": + if get_part(2) == "inherited-class": + return "EntityTokenType.InheritedClassName" + + elif get_part(0) == "meta": + if get_part(1) == "embedded": + if get_part(2) == "block": + return "MetaTokenType.PythonBlock" + elif get_part(2) == "line": + return "MetaTokenType.PythonLine" + + elif get_part(1) == "arguments" or get_part(2) == "arguments" or get_part(3) == "arguments": + return "MetaTokenType.Arguments" + + elif get_part(1) == "function-call": + if get_part(2) == "label": + return "MetaTokenType.LabelCall" + else: + return "MetaTokenType.FunctionCall" + + elif get_part(1) == "member": + if get_part(2) == "access": + if get_part(3) == "label": + return "MetaTokenType.LabelAccess" + else: + return "MetaTokenType.MemberAccess" + + elif get_part(1) == "string": + if get_part(2) == "tag": + return "MetaTokenType.StringTag" + elif get_part(2) == "character": + return "MetaTokenType.CharacterNameString" + + elif (get_part(1) == "class" or get_part(1) == "function") and get_part(2) != "inheritance": + return "MetaTokenType." + titleCase(get_part(1)) + "Definition" + + else: + return "MetaTokenType." + titleCase(get_parts(slice(1, -1))) + + return "Error" + " /*Error: Could not convert token type*/" + +def get_token_type(state: GeneratorState, name: str) -> str: + token = name.split(" ")[-1] # Multi-tokens are not yet supported. For now assume the last token is the important one + token = convert_token_type_split(token) + + import_token_type = token.split(".")[0] + if not import_token_type in state.used_token_types: + state.used_token_types.append(import_token_type) + + return token.replace("Atl", "ATL") # Upper case ATL def get_match_str(match: str, hasCaptures: bool) -> str: match = match.replace("/", "\\/") # Escape forward slashes + + iFlagSet = False + if "(?i)" in match: + iFlagSet = True + match = match.replace("(?i)", "") + + match = match.replace("[:alpha:]", "a-zA-Z") + match = match.replace("[:alnum:]", "a-zA-Z0-9") + match = match.replace("[:upper:]", "A-Z") + + iFlag: str = "i" if iFlagSet else "" mFlag: str = "m" if re.search("(? str: +def transform_captures(state: GeneratorState, indent: int, captures: dict[str, Any], access_str: str) -> str: typescript_entry = "{\n" indent += 4 for key, value in captures.items(): - typescript_entry += f"{get_indent(indent)}{key}: {transform_pattern(indent, value)},\n" + capture_access_str = f"{access_str}[{key}]" + typescript_entry += f"{get_indent(indent)}{key}: {transform_pattern(state, indent, value, capture_access_str)},\n" indent -= 4 typescript_entry += f"{get_indent(indent)}}},\n" return typescript_entry -def transform_pattern(indent: int, value: dict[str, Any]) -> str: +def transform_pattern(state: GeneratorState, indent: int, value: dict[str, Any], access_str: str) -> str: typescript_entry = "{\n" indent += 4 + # Add debugName for patterns with regex + if "match" in value or "begin" in value or "end" in value: + typescript_entry += f"{get_indent(indent)}debugName: \"{access_str}\",\n\n" + # Add comments if "comment" in value: comment = value["comment"].replace("\n", f"\n{get_indent(indent)}// ") @@ -71,13 +397,13 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: # Add token type if "name" in value: name = value["name"] - token = get_token_type(name) - typescript_entry += f"{get_indent(indent)}token: {token}, // {name}\n" + token = get_token_type(state, name) + typescript_entry += f"{get_indent(indent)}token: {token}, /*{name}*/\n" if "contentName" in value: name = value["contentName"] - token = get_token_type(name) - typescript_entry += f"{get_indent(indent)}contentToken: {token}, // {name}\n" + token = get_token_type(state, name) + typescript_entry += f"{get_indent(indent)}contentToken: {token}, /*{name}*/\n" # Add match if "match" in value: @@ -87,7 +413,7 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: # Iterate through the captures in the value if "captures" in value: typescript_entry += f"{get_indent(indent)}captures: " - typescript_entry += transform_captures(indent, value["captures"]) + typescript_entry += transform_captures(state, indent, value["captures"], f"{access_str}.captures!") if "begin" in value: match = get_match_str(value["begin"], "beginCaptures" in value) @@ -96,7 +422,7 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: # Iterate through the beginCaptures in the value if "beginCaptures" in value: typescript_entry += f"{get_indent(indent)}beginCaptures: " - typescript_entry += transform_captures(indent, value["beginCaptures"]) + typescript_entry += transform_captures(state, indent, value["beginCaptures"], f"{access_str}.beginCaptures!") if "end" in value: match = get_match_str(value["end"], "endCaptures" in value) @@ -105,7 +431,7 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: # Iterate through the endCaptures in the value if "endCaptures" in value: typescript_entry += f"{get_indent(indent)}endCaptures: " - typescript_entry += transform_captures(indent, value["endCaptures"]) + typescript_entry += transform_captures(state, indent, value["endCaptures"], f"{access_str}.endCaptures!") # Iterate through the patterns in the value if "patterns" in value: @@ -113,17 +439,85 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: typescript_entry += f"{get_indent(indent)}patterns: [\n" indent += 4 + includes: list[tuple[str, int]] = [] + external_includes: list[tuple[str, int]] = [] + last_pattern_index = 0 + + # Handle includes first to make sure they are pushed in the correct order + for i in range(len(patterns)): + pattern = patterns[i] + + if "include" not in pattern: + last_pattern_index = i + continue + + include: str = pattern["include"] + + if include.startswith("source.renpy"): + include_parts = include.split("#") + + source = include_parts[0] + reference = include_parts[1] if len(include_parts) > 1 else None + + language = source.split(".")[-1] + language_accessor = titleCase(language) + "Patterns." + + if reference == None: + include = language_accessor + language + else: + include = language_accessor + camelCase(reference) + + if language not in state.source_imports: + state.source_imports.append(language) + + external_includes.append((include, i)) + else: + include = camelCase(include) + + # All includes that have not been defined yet, are pushed at the bottom of the file + if include not in state.defined_variables: + includes.append((include, i)) + else: + last_pattern_index = i + + # Add the includes to the list of includes + def process_includes(include_list: list[tuple[str, int]], entries_list: list[str]): + push_list: list[str] = [] + for i in range(len(include_list)): + [include, index] = include_list[i] + if index < last_pattern_index: # 0 < 0 + entries_list.append(f"{access_str}.patterns!.splice({index}, 0, {include});") + else: + push_list.append(include) + + if len(push_list) > 0: + entries_list.append(f"{access_str}.patterns!.push({', '.join(push_list)});") + + if len(includes) > 0: + process_includes(includes, state.pattern_include_entries) + last_pattern_index = includes[-1][1] + + if len(external_includes) > 0: + process_includes(external_includes, state.external_pattern_include_entries) + + # Now write the pattern source for i in range(len(patterns)): pattern = patterns[i] # Handle includes if "include" in pattern: - include = camelCase(PATTERN_PREFIX + pattern["include"]) - typescript_entry += f"{get_indent(indent)}{include},\n" + include: str = pattern["include"] + if include.startswith("source.renpy"): + continue + + include = camelCase(include) + if include in state.defined_variables: + typescript_entry += f"{get_indent(indent)}{include},\n" + continue typescript_entry += get_indent(indent) - typescript_entry += transform_pattern(indent, pattern) + typescript_entry += transform_pattern(state, indent, pattern, f"{access_str}.patterns![{i}]") typescript_entry += ",\n" indent -= 4 @@ -133,22 +527,109 @@ def transform_pattern(indent: int, value: dict[str, Any]) -> str: typescript_entry += f"{get_indent(indent)}}}" return typescript_entry - -# Iterate through the repository entries -for key, value in repository.items(): - patternName = camelCase(PATTERN_PREFIX + key) - typescript_entry = f"export const {patternName}: TokenPattern = " - - typescript_entry += transform_pattern(0, value) +def generate_file(state: GeneratorState, source_file: str, output_file: str): - typescript_entry += ";\n" - # Add the typescript entry to the list of entries - typescript_entries.append(typescript_entry) - -# Write the typescript entries to a file -with open("./src/tokenizer/generated.output.ts", "w") as file: - contents: str = "import { KeywordTokenType, LiteralTokenType, MetaTokenType } from \"./renpy-tokens\";\n" - contents += "import { TokenPattern } from \"./token-pattern-types\";\n\n" - contents += "\n".join(typescript_entries) - file.write(contents) + # load the input data from the file + with open(source_file, "r") as file: + data = json.load(file) + + # get the repository value + repository = data.get("repository", {}) + + # Iterate through the repository entries + src_lines: list[str] = [] + for key, value in repository.items(): + patternName = camelCase(key) + + # Keep track of the defined variables to reduce the amount of variables we need to push later + state.defined_variables.append(patternName) + + typescript_entry = f"export const {patternName}: TokenPattern = " + + typescript_entry += transform_pattern(state, 0, value, patternName) + + typescript_entry += ";\n" + + # Add the typescript entry to the list of entries + src_lines.append(typescript_entry) + + # Write the typescript entries to a file + with open(output_file, "w") as file: + eslint_comments: list[str] = [ + "/* eslint-disable no-useless-escape */", + "/* eslint-disable no-useless-backreference */", + "/* eslint-disable @typescript-eslint/no-non-null-assertion */", + ] + + contents: str = "\n".join(eslint_comments) + "\n\n" + contents += "// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR\n" + contents += "// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT.\n" + contents += "// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED.\n" + contents += f"// Last generated: {datetime.utcnow().strftime('%d/%m/%Y %H:%M:%S')} (UTC+0)\n" + contents += "\n" + + contents += f"import {{ {', '.join(state.used_token_types)} }} from \"./renpy-tokens\";\n" + contents += "import { TokenPattern } from \"./token-pattern-types\";\n\n" + contents += "\n".join(src_lines) + + if len(state.pattern_include_entries) > 0: + contents += "\n// Push pattern references that were not defined on include\n" + contents += "\n".join(state.pattern_include_entries) + + # Remove any newlines from the body of a single line array + contents = re.sub(r':\s*\[\n\s*([^,]*),\n?\s*\]', r': [\1]', contents, flags=re.MULTILINE) + + # Remove any newlines from the body of a single line definition + contents = re.sub(r':\s*{\n\s*(.*)\n?\s*}', r': { \1 }', contents, flags=re.MULTILINE) + + file.write(contents) + +def generate_token_patterns(): + renpy_state = GeneratorState() + atl_state = GeneratorState() + python_state = GeneratorState() + + generate_file(renpy_state, "./syntaxes/renpy.tmLanguage.json", "./src/tokenizer/renpy-token-patterns.g.ts") + generate_file(atl_state, "./syntaxes/renpy.atl.tmLanguage.json", "./src/tokenizer/atl-token-patterns.g.ts") + generate_file(python_state, "./syntaxes/renpy.python.tmLanguage.json", "./src/tokenizer/python-token-patterns.g.ts") + + # Write the typescript entries to a file + output_file = "./src/tokenizer/token-patterns.g.ts" + with open(output_file, "w") as file: + contents = "/* eslint-disable @typescript-eslint/no-non-null-assertion */\n\n" + contents += "// THIS FILE HAS BEEN GENERATED BY THE `syntax-to-token-pattern.py` GENERATOR\n" + contents += "// DO NOT EDIT THIS FILE DIRECTLY! INSTEAD RUN THE PYTHON SCRIPT.\n" + contents += "// ANY MANUAL EDITS MADE TO THIS FILE WILL BE OVERWRITTEN. YOU HAVE BEEN WARNED.\n" + contents += f"// Last generated: {datetime.utcnow().strftime('%d/%m/%Y %H:%M:%S')} (UTC+0)\n" + contents += "\n" + + # Add all source import from all states, but only the unique ones + source_imports = set(renpy_state.source_imports + atl_state.source_imports + python_state.source_imports) + + for source_import in source_imports: + contents += f"import * as {titleCase(source_import)}Patterns from \"./{source_import}-token-patterns.g\";\n" + + def add_entries(entries: list[str], prefix: str) -> str: + contents = "" + if len(entries) > 0: + contents += f"\n// Push all {prefix} external includes\n" + for entry in entries: + contents += f"{prefix}.{entry}\n" + + return contents + + contents += add_entries(renpy_state.external_pattern_include_entries, "RenpyPatterns") + contents += add_entries(atl_state.external_pattern_include_entries, "AtlPatterns") + contents += add_entries(python_state.external_pattern_include_entries, "PythonPatterns") + + exports: list[str] = [] + for source_import in source_imports: + exports.append(f"{titleCase(source_import)}Patterns") + + contents += f"\n\nexport {{ {', '.join(exports)} }};" + + file.write(contents) + +# main +generate_token_patterns() \ No newline at end of file diff --git a/syntaxes/renpy.atl.tmLanguage.json b/syntaxes/renpy.atl.tmLanguage.json new file mode 100644 index 0000000..c0e7ebd --- /dev/null +++ b/syntaxes/renpy.atl.tmLanguage.json @@ -0,0 +1,333 @@ +{ + "information_for_contributors": ["Please remeber to regenerate the ts token patterns using the 'syntax-to-token-pattern.py' python script"], + "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json", + "name": "Ren'Py ATL", + "scopeName": "source.renpy.atl", + "patterns": [{ "include": "#atl" }], + "repository": { + "atl": { + "comment": "https://www.renpy.org/doc/html/atl.html#atl-syntax-and-semantics", + "patterns": [{ "include": "#atl-keywords" }, { "include": "#atl-blocks" }, { "include": "#atl-simple-expression" }, { "include": "#atl-warper" }, { "include": "#atl-event" }, { "include": "#atl-on" }, { "include": "#atl-function" }] + }, + "atl-build-in-properties": { + "comment": "https://www.renpy.org/doc/html/atl.html#list-of-transform-properties", + "patterns": [ + { + "comment": "Special manipulation keywords", + "match": "\\b(?>>|\\.\\.\\.)\\s)(?=\\s*\\S))", + "match": "(?:\\s*((?:>>>|\\.\\.\\.)\\s)(?=\\s*\\S))", "captures": { "1": { "name": "keyword.control.flow.python" } } @@ -190,8 +194,11 @@ "match": "\\b(?>=|//=|\\*\\*=|\\+=|-=|/=|@=|\\*=|%=|~=|\\^=|&=|\\|=|=(?!=)" }, "operator": { - "match": "\\b(?>|&|\\||\\^|~)(?#3)|(\\*\\*|\\*|\\+|-|%|//|/|@)(?#4)|(!=|==|>=|<=|<|>)(?#5)|(:=)(?#6)", + "match": "\\b(?>|&|\\||\\^|~)|(\\*\\*|\\*|\\+|-|%|//|/|@)|(!=|==|>=|<=|<|>)|(:=)", "captures": { "1": { "name": "keyword.operator.logical.python" }, "2": { "name": "keyword.control.flow.python" }, @@ -375,7 +383,6 @@ ] }, "number": { - "name": "constant.numeric.python", "patterns": [ { "include": "#number-float" }, { "include": "#number-dec" }, @@ -551,7 +558,7 @@ "name": "invalid.illegal.brace.python", "match": "(}(?!}))" }, - "import": { + "import-statement": { "comment": "Import statements used to correctly mark `from`, `import`, and `as`", "patterns": [ { @@ -754,7 +761,7 @@ }, "function-declaration": { "name": "meta.function.python", - "begin": "\\s*(?:\\b(async)\\s+)?\\b(def)\\s+(?=[[:alpha:]_][[:word:]]*\\s*\\()", + "begin": "\\s*(?:\\b(async)\\s+)?\\b(def)\\s+(?=\\w+\\s*\\()", "end": "(:|(?=[#'\"\\n]))", "beginCaptures": { "1": { "name": "storage.type.function.async.python" }, @@ -898,7 +905,7 @@ "decorator": { "name": "meta.function.decorator.python", "begin": "^\\s*((@))\\s*(?=[[:alpha:]_]\\w*)", - "end": "(\\))#trailingwhitespaceandcommentsarelegal(?:(.*?)(?=\\s*(?:\\#|$)))|(?=\\n|\\#)", + "end": "(\\))(?:(.*?)(?=\\s*(?:\\#|$)))|(?=\\n|\\#)", "beginCaptures": { "1": { "name": "entity.name.function.decorator.python" }, "2": { "name": "punctuation.definition.decorator.python" } @@ -1028,7 +1035,7 @@ }, "builtin-types": { "name": "support.type.python", - "match": "(?)", "end": "(\\)|(?=\\'))|((?=(?)", "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.named.begin.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp" }, "2": { "name": "entity.name.tag.named.group.regexp" } }, "endCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.named.end.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.end.named.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1487,10 +1494,10 @@ "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { "0": { "name": "keyword.operator.lookahead.regexp" }, - "1": { "name": "punctuation.parenthesis.lookahead.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookahead.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookahead.regexp punctuation.parenthesis.lookahead.end.regexp" }, + "1": { "name": "keyword.operator.lookahead.regexp punctuation.parenthesis.end.lookahead.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1500,10 +1507,10 @@ "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { "0": { "name": "keyword.operator.lookahead.negative.regexp" }, - "1": { "name": "punctuation.parenthesis.lookahead.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookahead.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookahead.negative.regexp punctuation.parenthesis.lookahead.end.regexp" }, + "1": { "name": "keyword.operator.lookahead.negative.regexp punctuation.parenthesis.end.lookahead.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1513,10 +1520,10 @@ "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { "0": { "name": "keyword.operator.lookbehind.regexp" }, - "1": { "name": "punctuation.parenthesis.lookbehind.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookbehind.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookbehind.regexp punctuation.parenthesis.lookbehind.end.regexp" }, + "1": { "name": "keyword.operator.lookbehind.regexp punctuation.parenthesis.end.lookbehind.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1526,10 +1533,10 @@ "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { "0": { "name": "keyword.operator.lookbehind.negative.regexp" }, - "1": { "name": "punctuation.parenthesis.lookbehind.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookbehind.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookbehind.negative.regexp punctuation.parenthesis.lookbehind.end.regexp" }, + "1": { "name": "keyword.operator.lookbehind.negative.regexp punctuation.parenthesis.end.lookbehind.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1539,10 +1546,10 @@ "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { "0": { "name": "keyword.operator.conditional.regexp" }, - "1": { "name": "punctuation.parenthesis.conditional.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.conditional.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.conditional.negative.regexp punctuation.parenthesis.conditional.end.regexp" }, + "1": { "name": "keyword.operator.conditional.negative.regexp punctuation.parenthesis.end.conditional.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1551,10 +1558,10 @@ "begin": "\\(\\?:", "end": "(\\)|(?=\\'\\'\\'))", "beginCaptures": { - "0": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.non-capturing.begin.regexp" } + "0": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.begin.non-capturing.regexp" } }, "endCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.non-capturing.end.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.end.non-capturing.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#single-three-regexp-expression" }, { "include": "#comments-string-single-three" }] @@ -1598,12 +1605,12 @@ "begin": "(\\[)(\\^)?(\\])?", "end": "(\\]|(?=\"))|((?=(?)", "end": "(\\)|(?=\"))|((?=(?)", "end": "(\\)|(?=\"\"\"))", "beginCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.named.begin.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.begin.named.regexp" }, "2": { "name": "entity.name.tag.named.group.regexp" } }, "endCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.named.end.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.end.named.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1810,10 +1817,10 @@ "end": "(\\)|(?=\"\"\"))", "beginCaptures": { "0": { "name": "keyword.operator.lookahead.regexp" }, - "1": { "name": "punctuation.parenthesis.lookahead.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookahead.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookahead.regexp punctuation.parenthesis.lookahead.end.regexp" }, + "1": { "name": "keyword.operator.lookahead.regexp punctuation.parenthesis.end.lookahead.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1823,10 +1830,10 @@ "end": "(\\)|(?=\"\"\"))", "beginCaptures": { "0": { "name": "keyword.operator.lookahead.negative.regexp" }, - "1": { "name": "punctuation.parenthesis.lookahead.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookahead.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookahead.negative.regexp punctuation.parenthesis.lookahead.end.regexp" }, + "1": { "name": "keyword.operator.lookahead.negative.regexp punctuation.parenthesis.end.lookahead.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1836,10 +1843,10 @@ "end": "(\\)|(?=\"\"\"))", "beginCaptures": { "0": { "name": "keyword.operator.lookbehind.regexp" }, - "1": { "name": "punctuation.parenthesis.lookbehind.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookbehind.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookbehind.regexp punctuation.parenthesis.lookbehind.end.regexp" }, + "1": { "name": "keyword.operator.lookbehind.regexp punctuation.parenthesis.end.lookbehind.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1849,10 +1856,10 @@ "end": "(\\)|(?=\"\"\"))", "beginCaptures": { "0": { "name": "keyword.operator.lookbehind.negative.regexp" }, - "1": { "name": "punctuation.parenthesis.lookbehind.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.lookbehind.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.lookbehind.negative.regexp punctuation.parenthesis.lookbehind.end.regexp" }, + "1": { "name": "keyword.operator.lookbehind.negative.regexp punctuation.parenthesis.end.lookbehind.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1862,10 +1869,10 @@ "end": "(\\)|(?=\"\"\"))", "beginCaptures": { "0": { "name": "keyword.operator.conditional.regexp" }, - "1": { "name": "punctuation.parenthesis.conditional.begin.regexp" } + "1": { "name": "punctuation.parenthesis.begin.conditional.regexp" } }, "endCaptures": { - "1": { "name": "keyword.operator.conditional.negative.regexp punctuation.parenthesis.conditional.end.regexp" }, + "1": { "name": "keyword.operator.conditional.negative.regexp punctuation.parenthesis.end.conditional.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -1874,10 +1881,10 @@ "begin": "\\(\\?:", "end": "(\\)|(?=\"\"\"))", "beginCaptures": { - "0": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.non-capturing.begin.regexp" } + "0": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.begin.non-capturing.regexp" } }, "endCaptures": { - "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.non-capturing.end.regexp" }, + "1": { "name": "support.other.parenthesis.regexp punctuation.parenthesis.end.non-capturing.regexp" }, "2": { "name": "invalid.illegal.newline.python" } }, "patterns": [{ "include": "#double-three-regexp-expression" }, { "include": "#comments-string-double-three" }] @@ -2144,7 +2151,7 @@ "beginCaptures": { "1": { "name": "invalid.illegal.prefix.python" }, "2": { "name": "string.interpolated.python string.quoted.single.python storage.type.string.python" }, - "3": { "name": "punctuation.definition.string.begin.python string.quoted.single.python" } + "3": { "name": "string.quoted.single.python punctuation.definition.string.begin.python" } }, "endCaptures": { "1": { "name": "punctuation.definition.string.end.python string.interpolated.python string.quoted.single.python" }, @@ -2158,7 +2165,7 @@ "end": "(\\2)|((?