From 408e52482736c6c9a23177345468866859f72377 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 13:24:42 +0200
Subject: [PATCH 1/7] Added new antlr grammar

---
 .../de/jplag/python3/grammar/Python3Lexer.g4  | 447 ++++------
 .../de/jplag/python3/grammar/Python3Parser.g4 | 781 ++++++++++++++----
 .../python3/grammar/Python3LexerBase.java     | 100 ++-
 .../python3/grammar/Python3ParserBase.java    |  17 +-
 4 files changed, 864 insertions(+), 481 deletions(-)

diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
index 9b5fee1dc..8b36564b9 100644
--- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
+++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4
@@ -28,190 +28,159 @@
  *                https://github.com/bkiers/python3-parser
  * Developed by : Bart Kiers, bart@big-o.nl
  */
+
+// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
+// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
+
 lexer grammar Python3Lexer;
 
 // All comments that start with "///" are copy-pasted from
 // The Python Language Reference
 
-tokens { INDENT, DEDENT }
+tokens {
+    INDENT,
+    DEDENT
+}
 
 options {
-    superClass=Python3LexerBase;
+    superClass = Python3LexerBase;
 }
 
+// Insert here @header for C++ lexer.
+
 /*
  * lexer rules
  */
 
-STRING
- : STRING_LITERAL
- | BYTES_LITERAL
- ;
-
-NUMBER
- : INTEGER
- | FLOAT_NUMBER
- | IMAG_NUMBER
- ;
-
-INTEGER
- : DECIMAL_INTEGER
- | OCT_INTEGER
- | HEX_INTEGER
- | BIN_INTEGER
- ;
-
-AND : 'and';
-AS : 'as';
-ASSERT : 'assert';
-ASYNC : 'async';
-AWAIT : 'await';
-BREAK : 'break';
-CASE : 'case' ;
-CLASS : 'class';
-CONTINUE : 'continue';
-DEF : 'def';
-DEL : 'del';
-ELIF : 'elif';
-ELSE : 'else';
-EXCEPT : 'except';
-FALSE : 'False';
-FINALLY : 'finally';
-FOR : 'for';
-FROM : 'from';
-GLOBAL : 'global';
-IF : 'if';
-IMPORT : 'import';
-IN : 'in';
-IS : 'is';
-LAMBDA : 'lambda';
-MATCH : 'match' ;
-NONE : 'None';
-NONLOCAL : 'nonlocal';
-NOT : 'not';
-OR : 'or';
-PASS : 'pass';
-RAISE : 'raise';
-RETURN : 'return';
-TRUE : 'True';
-TRY : 'try';
-UNDERSCORE : '_' ;
-WHILE : 'while';
-WITH : 'with';
-YIELD : 'yield';
-
-NEWLINE
- : ( {this.atStartOfInput()}?   SPACES
-   | ( '\r'? '\n' | '\r' | '\f' ) SPACES?
-   )
-   {this.onNewLine();}
- ;
+STRING: STRING_LITERAL | BYTES_LITERAL;
+
+NUMBER: INTEGER | FLOAT_NUMBER | IMAG_NUMBER;
+
+INTEGER: DECIMAL_INTEGER | OCT_INTEGER | HEX_INTEGER | BIN_INTEGER;
+
+AND        : 'and';
+AS         : 'as';
+ASSERT     : 'assert';
+ASYNC      : 'async';
+AWAIT      : 'await';
+BREAK      : 'break';
+CASE       : 'case';
+CLASS      : 'class';
+CONTINUE   : 'continue';
+DEF        : 'def';
+DEL        : 'del';
+ELIF       : 'elif';
+ELSE       : 'else';
+EXCEPT     : 'except';
+FALSE      : 'False';
+FINALLY    : 'finally';
+FOR        : 'for';
+FROM       : 'from';
+GLOBAL     : 'global';
+IF         : 'if';
+IMPORT     : 'import';
+IN         : 'in';
+IS         : 'is';
+LAMBDA     : 'lambda';
+MATCH      : 'match';
+NONE       : 'None';
+NONLOCAL   : 'nonlocal';
+NOT        : 'not';
+OR         : 'or';
+PASS       : 'pass';
+RAISE      : 'raise';
+RETURN     : 'return';
+TRUE       : 'True';
+TRY        : 'try';
+UNDERSCORE : '_';
+WHILE      : 'while';
+WITH       : 'with';
+YIELD      : 'yield';
+
+NEWLINE: ({this.atStartOfInput()}? SPACES | ( '\r'? '\n' | '\r' | '\f') SPACES?) {this.onNewLine();};
 
 /// identifier   ::=  id_start id_continue*
-NAME
- : ID_START ID_CONTINUE*
- ;
+NAME: ID_START ID_CONTINUE*;
 
 /// stringliteral   ::=  [stringprefix](shortstring | longstring)
 /// stringprefix    ::=  "r" | "u" | "R" | "U" | "f" | "F"
 ///                      | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF"
-STRING_LITERAL
- : ( [rR] | [uU] | [fF] | ( [fF] [rR] ) | ( [rR] [fF] ) )? ( SHORT_STRING | LONG_STRING )
- ;
+STRING_LITERAL: ( [rR] | [uU] | [fF] | ( [fF] [rR]) | ( [rR] [fF]))? ( SHORT_STRING | LONG_STRING);
 
 /// bytesliteral   ::=  bytesprefix(shortbytes | longbytes)
 /// bytesprefix    ::=  "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB"
-BYTES_LITERAL
- : ( [bB] | ( [bB] [rR] ) | ( [rR] [bB] ) ) ( SHORT_BYTES | LONG_BYTES )
- ;
+BYTES_LITERAL: ( [bB] | ( [bB] [rR]) | ( [rR] [bB])) ( SHORT_BYTES | LONG_BYTES);
 
 /// decimalinteger ::=  nonzerodigit digit* | "0"+
-DECIMAL_INTEGER
- : NON_ZERO_DIGIT DIGIT*
- | '0'+
- ;
+DECIMAL_INTEGER: NON_ZERO_DIGIT DIGIT* | '0'+;
 
 /// octinteger     ::=  "0" ("o" | "O") octdigit+
-OCT_INTEGER
- : '0' [oO] OCT_DIGIT+
- ;
+OCT_INTEGER: '0' [oO] OCT_DIGIT+;
 
 /// hexinteger     ::=  "0" ("x" | "X") hexdigit+
-HEX_INTEGER
- : '0' [xX] HEX_DIGIT+
- ;
+HEX_INTEGER: '0' [xX] HEX_DIGIT+;
 
 /// bininteger     ::=  "0" ("b" | "B") bindigit+
-BIN_INTEGER
- : '0' [bB] BIN_DIGIT+
- ;
+BIN_INTEGER: '0' [bB] BIN_DIGIT+;
 
 /// floatnumber   ::=  pointfloat | exponentfloat
-FLOAT_NUMBER
- : POINT_FLOAT
- | EXPONENT_FLOAT
- ;
+FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT;
 
 /// imagnumber ::=  (floatnumber | intpart) ("j" | "J")
-IMAG_NUMBER
- : ( FLOAT_NUMBER | INT_PART ) [jJ]
- ;
-
-DOT : '.';
-ELLIPSIS : '...';
-STAR : '*';
-OPEN_PAREN : '(' {this.openBrace();};
-CLOSE_PAREN : ')' {this.closeBrace();};
-COMMA : ',';
-COLON : ':';
-SEMI_COLON : ';';
-POWER : '**';
-ASSIGN : '=';
-OPEN_BRACK : '[' {this.openBrace();};
-CLOSE_BRACK : ']' {this.closeBrace();};
-OR_OP : '|';
-XOR : '^';
-AND_OP : '&';
-LEFT_SHIFT : '<<';
-RIGHT_SHIFT : '>>';
-ADD : '+';
-MINUS : '-';
-DIV : '/';
-MOD : '%';
-IDIV : '//';
-NOT_OP : '~';
-OPEN_BRACE : '{' {this.openBrace();};
-CLOSE_BRACE : '}' {this.closeBrace();};
-LESS_THAN : '<';
-GREATER_THAN : '>';
-EQUALS : '==';
-GT_EQ : '>=';
-LT_EQ : '<=';
-NOT_EQ_1 : '<>';
-NOT_EQ_2 : '!=';
-AT : '@';
-ARROW : '->';
-ADD_ASSIGN : '+=';
-SUB_ASSIGN : '-=';
-MULT_ASSIGN : '*=';
-AT_ASSIGN : '@=';
-DIV_ASSIGN : '/=';
-MOD_ASSIGN : '%=';
-AND_ASSIGN : '&=';
-OR_ASSIGN : '|=';
-XOR_ASSIGN : '^=';
-LEFT_SHIFT_ASSIGN : '<<=';
+IMAG_NUMBER: ( FLOAT_NUMBER | INT_PART) [jJ];
+
+DOT                : '.';
+ELLIPSIS           : '...';
+STAR               : '*';
+OPEN_PAREN         : '(' {this.openBrace();};
+CLOSE_PAREN        : ')' {this.closeBrace();};
+COMMA              : ',';
+COLON              : ':';
+SEMI_COLON         : ';';
+POWER              : '**';
+ASSIGN             : '=';
+OPEN_BRACK         : '[' {this.openBrace();};
+CLOSE_BRACK        : ']' {this.closeBrace();};
+OR_OP              : '|';
+XOR                : '^';
+AND_OP             : '&';
+LEFT_SHIFT         : '<<';
+RIGHT_SHIFT        : '>>';
+ADD                : '+';
+MINUS              : '-';
+DIV                : '/';
+MOD                : '%';
+IDIV               : '//';
+NOT_OP             : '~';
+OPEN_BRACE         : '{' {this.openBrace();};
+CLOSE_BRACE        : '}' {this.closeBrace();};
+LESS_THAN          : '<';
+GREATER_THAN       : '>';
+EQUALS             : '==';
+GT_EQ              : '>=';
+LT_EQ              : '<=';
+NOT_EQ_1           : '<>';
+NOT_EQ_2           : '!=';
+AT                 : '@';
+ARROW              : '->';
+ADD_ASSIGN         : '+=';
+SUB_ASSIGN         : '-=';
+MULT_ASSIGN        : '*=';
+AT_ASSIGN          : '@=';
+DIV_ASSIGN         : '/=';
+MOD_ASSIGN         : '%=';
+AND_ASSIGN         : '&=';
+OR_ASSIGN          : '|=';
+XOR_ASSIGN         : '^=';
+LEFT_SHIFT_ASSIGN  : '<<=';
 RIGHT_SHIFT_ASSIGN : '>>=';
-POWER_ASSIGN : '**=';
-IDIV_ASSIGN : '//=';
+POWER_ASSIGN       : '**=';
+IDIV_ASSIGN        : '//=';
 
-SKIP_
- : ( SPACES | COMMENT | LINE_JOINING ) -> skip
- ;
+SKIP_: ( SPACES | COMMENT | LINE_JOINING) -> skip;
 
-UNKNOWN_CHAR
- : .
- ;
+UNKNOWN_CHAR: .;
 
 /*
  * fragments
@@ -220,143 +189,93 @@ UNKNOWN_CHAR
 /// shortstring     ::=  "'" shortstringitem* "'" | '"' shortstringitem* '"'
 /// shortstringitem ::=  shortstringchar | stringescapeseq
 /// shortstringchar ::=  <any source character except "\" or newline or the quote>
-fragment SHORT_STRING
- : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\''
- | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"'
- ;
+fragment SHORT_STRING:
+    '\'' (STRING_ESCAPE_SEQ | ~[\\\r\n\f'])* '\''
+    | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"])* '"'
+;
 /// longstring      ::=  "'''" longstringitem* "'''" | '"""' longstringitem* '"""'
-fragment LONG_STRING
- : '\'\'\'' LONG_STRING_ITEM*? '\'\'\''
- | '"""' LONG_STRING_ITEM*? '"""'
- ;
+fragment LONG_STRING: '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' | '"""' LONG_STRING_ITEM*? '"""';
 
 /// longstringitem  ::=  longstringchar | stringescapeseq
-fragment LONG_STRING_ITEM
- : LONG_STRING_CHAR
- | STRING_ESCAPE_SEQ
- ;
+fragment LONG_STRING_ITEM: LONG_STRING_CHAR | STRING_ESCAPE_SEQ;
 
 /// longstringchar  ::=  <any source character except "\">
-fragment LONG_STRING_CHAR
- : ~'\\'
- ;
+fragment LONG_STRING_CHAR: ~'\\';
 
 /// stringescapeseq ::=  "\" <any source character>
-fragment STRING_ESCAPE_SEQ
- : '\\' .
- | '\\' NEWLINE
- ;
+fragment STRING_ESCAPE_SEQ: '\\' . | '\\' NEWLINE;
 
 /// nonzerodigit   ::=  "1"..."9"
-fragment NON_ZERO_DIGIT
- : [1-9]
- ;
+fragment NON_ZERO_DIGIT: [1-9];
 
 /// digit          ::=  "0"..."9"
-fragment DIGIT
- : [0-9]
- ;
+fragment DIGIT: [0-9];
 
 /// octdigit       ::=  "0"..."7"
-fragment OCT_DIGIT
- : [0-7]
- ;
+fragment OCT_DIGIT: [0-7];
 
 /// hexdigit       ::=  digit | "a"..."f" | "A"..."F"
-fragment HEX_DIGIT
- : [0-9a-fA-F]
- ;
+fragment HEX_DIGIT: [0-9a-fA-F];
 
 /// bindigit       ::=  "0" | "1"
-fragment BIN_DIGIT
- : [01]
- ;
+fragment BIN_DIGIT: [01];
 
 /// pointfloat    ::=  [intpart] fraction | intpart "."
-fragment POINT_FLOAT
- : INT_PART? FRACTION
- | INT_PART '.'
- ;
+fragment POINT_FLOAT: INT_PART? FRACTION | INT_PART '.';
 
 /// exponentfloat ::=  (intpart | pointfloat) exponent
-fragment EXPONENT_FLOAT
- : ( INT_PART | POINT_FLOAT ) EXPONENT
- ;
+fragment EXPONENT_FLOAT: ( INT_PART | POINT_FLOAT) EXPONENT;
 
 /// intpart       ::=  digit+
-fragment INT_PART
- : DIGIT+
- ;
+fragment INT_PART: DIGIT+;
 
 /// fraction      ::=  "." digit+
-fragment FRACTION
- : '.' DIGIT+
- ;
+fragment FRACTION: '.' DIGIT+;
 
 /// exponent      ::=  ("e" | "E") ["+" | "-"] digit+
-fragment EXPONENT
- : [eE] [+-]? DIGIT+
- ;
+fragment EXPONENT: [eE] [+-]? DIGIT+;
 
 /// shortbytes     ::=  "'" shortbytesitem* "'" | '"' shortbytesitem* '"'
 /// shortbytesitem ::=  shortbyteschar | bytesescapeseq
-fragment SHORT_BYTES
- : '\'' ( SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ )* '\''
- | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ )* '"'
- ;
+fragment SHORT_BYTES:
+    '\'' (SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ)* '\''
+    | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ)* '"'
+;
 
 /// longbytes      ::=  "'''" longbytesitem* "'''" | '"""' longbytesitem* '"""'
-fragment LONG_BYTES
- : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\''
- | '"""' LONG_BYTES_ITEM*? '"""'
- ;
+fragment LONG_BYTES: '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' | '"""' LONG_BYTES_ITEM*? '"""';
 
 /// longbytesitem  ::=  longbyteschar | bytesescapeseq
-fragment LONG_BYTES_ITEM
- : LONG_BYTES_CHAR
- | BYTES_ESCAPE_SEQ
- ;
+fragment LONG_BYTES_ITEM: LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ;
 
 /// shortbyteschar ::=  <any ASCII character except "\" or newline or the quote>
-fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE
- : [\u0000-\u0009]
- | [\u000B-\u000C]
- | [\u000E-\u0026]
- | [\u0028-\u005B]
- | [\u005D-\u007F]
- ;
-
-fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE
- : [\u0000-\u0009]
- | [\u000B-\u000C]
- | [\u000E-\u0021]
- | [\u0023-\u005B]
- | [\u005D-\u007F]
- ;
+fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE:
+    [\u0000-\u0009]
+    | [\u000B-\u000C]
+    | [\u000E-\u0026]
+    | [\u0028-\u005B]
+    | [\u005D-\u007F]
+;
+
+fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE:
+    [\u0000-\u0009]
+    | [\u000B-\u000C]
+    | [\u000E-\u0021]
+    | [\u0023-\u005B]
+    | [\u005D-\u007F]
+;
 
 /// longbyteschar  ::=  <any ASCII character except "\">
-fragment LONG_BYTES_CHAR
- : [\u0000-\u005B]
- | [\u005D-\u007F]
- ;
+fragment LONG_BYTES_CHAR: [\u0000-\u005B] | [\u005D-\u007F];
 
 /// bytesescapeseq ::=  "\" <any ASCII character>
-fragment BYTES_ESCAPE_SEQ
- : '\\' [\u0000-\u007F]
- ;
-
-fragment SPACES
- : [ \t]+
- ;
+fragment BYTES_ESCAPE_SEQ: '\\' [\u0000-\u007F];
 
-fragment COMMENT
- : '#' ~[\r\n\f]*
- ;
+fragment SPACES: [ \t]+;
 
-fragment LINE_JOINING
- : '\\' SPACES? ( '\r'? '\n' | '\r' | '\f')
- ;
+fragment COMMENT: '#' ~[\r\n\f]*;
 
+fragment LINE_JOINING: '\\' SPACES? ( '\r'? '\n' | '\r' | '\f');
 
 // TODO: ANTLR seems lack of some Unicode property support...
 //$ curl https://www.unicode.org/Public/13.0.0/ucd/PropList.txt | grep Other_ID_
@@ -369,36 +288,26 @@ fragment LINE_JOINING
 //1369..1371    ; Other_ID_Continue # No   [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE
 //19DA          ; Other_ID_Continue # No       NEW TAI LUE THAM DIGIT ONE
 
-fragment UNICODE_OIDS
- : '\u1885'..'\u1886'
- | '\u2118'
- | '\u212e'
- | '\u309b'..'\u309c'
- ;
+fragment UNICODE_OIDS: '\u1885' ..'\u1886' | '\u2118' | '\u212e' | '\u309b' ..'\u309c';
 
-fragment UNICODE_OIDC
- : '\u00b7'
- | '\u0387'
- | '\u1369'..'\u1371'
- | '\u19da'
- ;
+fragment UNICODE_OIDC: '\u00b7' | '\u0387' | '\u1369' ..'\u1371' | '\u19da';
 
 /// id_start     ::=  <all characters in general categories Lu, Ll, Lt, Lm, Lo, Nl, the underscore, and characters with the Other_ID_Start property>
-fragment ID_START
- : '_'
- | [\p{L}]
- | [\p{Nl}]
- //| [\p{Other_ID_Start}]
- | UNICODE_OIDS
- ;
+fragment ID_START:
+    '_'
+    | [\p{L}]
+    | [\p{Nl}]
+    //| [\p{Other_ID_Start}]
+    | UNICODE_OIDS
+;
 
 /// id_continue  ::=  <all characters in id_start, plus characters in the categories Mn, Mc, Nd, Pc and others with the Other_ID_Continue property>
-fragment ID_CONTINUE
- : ID_START
- | [\p{Mn}]
- | [\p{Mc}]
- | [\p{Nd}]
- | [\p{Pc}]
- //| [\p{Other_ID_Continue}]
- | UNICODE_OIDC
- ;
+fragment ID_CONTINUE:
+    ID_START
+    | [\p{Mn}]
+    | [\p{Mc}]
+    | [\p{Nd}]
+    | [\p{Pc}]
+    //| [\p{Other_ID_Continue}]
+    | UNICODE_OIDC
+;
\ No newline at end of file
diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
index 8b0143de6..4c5a27cf2 100644
--- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
+++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4
@@ -31,186 +31,623 @@
 
 // Scraping from https://docs.python.org/3/reference/grammar.html
 
+// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging
+
 parser grammar Python3Parser;
 
 options {
     superClass = Python3ParserBase;
-    tokenVocab=Python3Lexer;
+    tokenVocab = Python3Lexer;
 }
 
+// Insert here @header for C++ parser.
+
 // All comments that start with "///" are copy-pasted from
 // The Python Language Reference
 
-single_input: NEWLINE | simple_stmts | compound_stmt NEWLINE;
-file_input: (NEWLINE | stmt)* EOF;
-eval_input: testlist NEWLINE* EOF;
-
-decorator: '@' dotted_name ( '(' arglist? ')' )? NEWLINE;
-decorators: decorator+;
-decorated: decorators (classdef | funcdef | async_funcdef);
-
-async_funcdef: ASYNC funcdef;
-funcdef: 'def' name parameters ('->' test)? ':' block;
-
-parameters: '(' typedargslist? ')';
-typedargslist: (tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (',' (
-        '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )?
-      | '**' tfpdef ','? )? )?
-  | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )?
-  | '**' tfpdef ','?);
-tfpdef: name (':' test)?;
-varargslist: (vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (',' (
-        '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )?
-      | '**' vfpdef (',')?)?)?
-  | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )?
-  | '**' vfpdef ','?
-);
-vfpdef: name;
-
-stmt: simple_stmts | compound_stmt;
-simple_stmts: simple_stmt (';' simple_stmt)* ';'? NEWLINE;
-simple_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt |
-             import_stmt | global_stmt | nonlocal_stmt | assert_stmt);
-expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |
-                     ('=' (yield_expr|testlist_star_expr))*);
-annassign: ':' test ('=' test)?;
-testlist_star_expr: (test|star_expr) (',' (test|star_expr))* ','?;
-augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' |
-            '<<=' | '>>=' | '**=' | '//=');
+single_input
+    : NEWLINE
+    | simple_stmts
+    | compound_stmt NEWLINE
+    ;
+
+file_input
+    : (NEWLINE | stmt)* EOF
+    ;
+
+eval_input
+    : testlist NEWLINE* EOF
+    ;
+
+decorator
+    : '@' dotted_name ('(' arglist? ')')? NEWLINE
+    ;
+
+decorators
+    : decorator+
+    ;
+
+decorated
+    : decorators (classdef | funcdef | async_funcdef)
+    ;
+
+async_funcdef
+    : ASYNC funcdef
+    ;
+
+funcdef
+    : 'def' name parameters ('->' test)? ':' block
+    ;
+
+parameters
+    : '(' typedargslist? ')'
+    ;
+
+typedargslist
+    : (
+        tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (
+            ',' (
+                '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)?
+                | '**' tfpdef ','?
+            )?
+        )?
+        | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)?
+        | '**' tfpdef ','?
+    )
+    ;
+
+tfpdef
+    : name (':' test)?
+    ;
+
+varargslist
+    : (
+        vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (
+            ',' (
+                '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)?
+                | '**' vfpdef (',')?
+            )?
+        )?
+        | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)?
+        | '**' vfpdef ','?
+    )
+    ;
+
+vfpdef
+    : name
+    ;
+
+stmt
+    : simple_stmts
+    | compound_stmt
+    ;
+
+simple_stmts
+    : simple_stmt (';' simple_stmt)* ';'? NEWLINE
+    ;
+
+simple_stmt
+    : (
+        expr_stmt
+        | del_stmt
+        | pass_stmt
+        | flow_stmt
+        | import_stmt
+        | global_stmt
+        | nonlocal_stmt
+        | assert_stmt
+    )
+    ;
+
+expr_stmt
+    : testlist_star_expr (
+        annassign
+        | augassign (yield_expr | testlist)
+        | ('=' (yield_expr | testlist_star_expr))*
+    )
+    ;
+
+annassign
+    : ':' test ('=' test)?
+    ;
+
+testlist_star_expr
+    : (test | star_expr) (',' (test | star_expr))* ','?
+    ;
+
+augassign
+    : (
+        '+='
+        | '-='
+        | '*='
+        | '@='
+        | '/='
+        | '%='
+        | '&='
+        | '|='
+        | '^='
+        | '<<='
+        | '>>='
+        | '**='
+        | '//='
+    )
+    ;
+
 // For normal and annotated assignments, additional restrictions enforced by the interpreter
-del_stmt: 'del' exprlist;
-pass_stmt: 'pass';
-flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt;
-break_stmt: 'break';
-continue_stmt: 'continue';
-return_stmt: 'return' testlist?;
-yield_stmt: yield_expr;
-raise_stmt: 'raise' (test ('from' test)?)?;
-import_stmt: import_name | import_from;
-import_name: 'import' dotted_as_names;
+del_stmt
+    : 'del' exprlist
+    ;
+
+pass_stmt
+    : 'pass'
+    ;
+
+flow_stmt
+    : break_stmt
+    | continue_stmt
+    | return_stmt
+    | raise_stmt
+    | yield_stmt
+    ;
+
+break_stmt
+    : 'break'
+    ;
+
+continue_stmt
+    : 'continue'
+    ;
+
+return_stmt
+    : 'return' testlist?
+    ;
+
+yield_stmt
+    : yield_expr
+    ;
+
+raise_stmt
+    : 'raise' (test ('from' test)?)?
+    ;
+
+import_stmt
+    : import_name
+    | import_from
+    ;
+
+import_name
+    : 'import' dotted_as_names
+    ;
+
 // note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS
-import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+)
-              'import' ('*' | '(' import_as_names ')' | import_as_names));
-import_as_name: name ('as' name)?;
-dotted_as_name: dotted_name ('as' name)?;
-import_as_names: import_as_name (',' import_as_name)* ','?;
-dotted_as_names: dotted_as_name (',' dotted_as_name)*;
-dotted_name: name ('.' name)*;
-global_stmt: 'global' name (',' name)*;
-nonlocal_stmt: 'nonlocal' name (',' name)*;
-assert_stmt: 'assert' test (',' test)?;
-
-compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt;
-async_stmt: ASYNC (funcdef | with_stmt | for_stmt);
-if_stmt: 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?;
-while_stmt: 'while' test ':' block ('else' ':' block)?;
-for_stmt: 'for' exprlist 'in' testlist ':' block ('else' ':' block)?;
-try_stmt: ('try' ':' block
-           ((except_clause ':' block)+
-            ('else' ':' block)?
-            ('finally' ':' block)? |
-           'finally' ':' block));
-with_stmt: 'with' with_item (',' with_item)*  ':' block;
-with_item: test ('as' expr)?;
+import_from
+    : (
+        'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' (
+            '*'
+            | '(' import_as_names ')'
+            | import_as_names
+        )
+    )
+    ;
+
+import_as_name
+    : name ('as' name)?
+    ;
+
+dotted_as_name
+    : dotted_name ('as' name)?
+    ;
+
+import_as_names
+    : import_as_name (',' import_as_name)* ','?
+    ;
+
+dotted_as_names
+    : dotted_as_name (',' dotted_as_name)*
+    ;
+
+dotted_name
+    : name ('.' name)*
+    ;
+
+global_stmt
+    : 'global' name (',' name)*
+    ;
+
+nonlocal_stmt
+    : 'nonlocal' name (',' name)*
+    ;
+
+assert_stmt
+    : 'assert' test (',' test)?
+    ;
+
+compound_stmt
+    : if_stmt
+    | while_stmt
+    | for_stmt
+    | try_stmt
+    | with_stmt
+    | funcdef
+    | classdef
+    | decorated
+    | async_stmt
+    | match_stmt
+    ;
+
+async_stmt
+    : ASYNC (funcdef | with_stmt | for_stmt)
+    ;
+
+if_stmt
+    : 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?
+    ;
+
+while_stmt
+    : 'while' test ':' block ('else' ':' block)?
+    ;
+
+for_stmt
+    : 'for' exprlist 'in' testlist ':' block ('else' ':' block)?
+    ;
+
+try_stmt
+    : (
+        'try' ':' block (
+            (except_clause ':' block)+ ('else' ':' block)? ('finally' ':' block)?
+            | 'finally' ':' block
+        )
+    )
+    ;
+
+with_stmt
+    : 'with' with_item (',' with_item)* ':' block
+    ;
+
+with_item
+    : test ('as' expr)?
+    ;
+
 // NB compile.c makes sure that the default except clause is last
-except_clause: 'except' (test ('as' name)?)?;
-block: simple_stmts | NEWLINE INDENT stmt+ DEDENT;
-match_stmt: 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT ;
-subject_expr: star_named_expression ',' star_named_expressions? | test ;
-star_named_expressions: ',' star_named_expression+ ','? ;
-star_named_expression: '*' expr | test ;
-case_block: 'case' patterns guard? ':' block ;
-guard: 'if' test ;
-patterns: open_sequence_pattern | pattern ;
-pattern: as_pattern | or_pattern ;
-as_pattern: or_pattern 'as' pattern_capture_target ;
-or_pattern: closed_pattern ('|' closed_pattern)* ;
-closed_pattern: literal_pattern | capture_pattern | wildcard_pattern | value_pattern | group_pattern | sequence_pattern | mapping_pattern | class_pattern ;
-literal_pattern: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ;
-literal_expr: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ;
-complex_number: signed_real_number '+' imaginary_number
+except_clause
+    : 'except' (test ('as' name)?)?
+    ;
+
+block
+    : simple_stmts
+    | NEWLINE INDENT stmt+ DEDENT
+    ;
+
+match_stmt
+    : 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT
+    ;
+
+subject_expr
+    : star_named_expression ',' star_named_expressions?
+    | test
+    ;
+
+star_named_expressions
+    : ',' star_named_expression+ ','?
+    ;
+
+star_named_expression
+    : '*' expr
+    | test
+    ;
+
+case_block
+    : 'case' patterns guard? ':' block
+    ;
+
+guard
+    : 'if' test
+    ;
+
+patterns
+    : open_sequence_pattern
+    | pattern
+    ;
+
+pattern
+    : as_pattern
+    | or_pattern
+    ;
+
+as_pattern
+    : or_pattern 'as' pattern_capture_target
+    ;
+
+or_pattern
+    : closed_pattern ('|' closed_pattern)*
+    ;
+
+closed_pattern
+    : literal_pattern
+    | capture_pattern
+    | wildcard_pattern
+    | value_pattern
+    | group_pattern
+    | sequence_pattern
+    | mapping_pattern
+    | class_pattern
+    ;
+
+literal_pattern
+    : signed_number { this.CannotBePlusMinus() }?
+    | complex_number
+    | strings
+    | 'None'
+    | 'True'
+    | 'False'
+    ;
+
+literal_expr
+    : signed_number { this.CannotBePlusMinus() }?
+    | complex_number
+    | strings
+    | 'None'
+    | 'True'
+    | 'False'
+    ;
+
+complex_number
+    : signed_real_number '+' imaginary_number
     | signed_real_number '-' imaginary_number
     ;
-signed_number: NUMBER | '-' NUMBER ;
-signed_real_number: real_number | '-' real_number ;
-real_number: NUMBER ;
-imaginary_number: NUMBER ;
-capture_pattern: pattern_capture_target ;
-pattern_capture_target: /* cannot be '_' */ name { this.cannotBeDotLpEq() }? ;
-wildcard_pattern: '_' ;
-value_pattern: attr { this.cannotBeDotLpEq() }? ;
-attr: name ('.' name)+ ;
-name_or_attr: attr | name ;
-group_pattern: '(' pattern ')' ;
-sequence_pattern:
-    '[' maybe_sequence_pattern? ']'
+
+signed_number
+    : NUMBER
+    | '-' NUMBER
+    ;
+
+signed_real_number
+    : real_number
+    | '-' real_number
+    ;
+
+real_number
+    : NUMBER
+    ;
+
+imaginary_number
+    : NUMBER
+    ;
+
+capture_pattern
+    : pattern_capture_target
+    ;
+
+pattern_capture_target
+    : /* cannot be '_' */ name { this.CannotBeDotLpEq() }?
+    ;
+
+wildcard_pattern
+    : '_'
+    ;
+
+value_pattern
+    : attr { this.CannotBeDotLpEq() }?
+    ;
+
+attr
+    : name ('.' name)+
+    ;
+
+name_or_attr
+    : attr
+    | name
+    ;
+
+group_pattern
+    : '(' pattern ')'
+    ;
+
+sequence_pattern
+    : '[' maybe_sequence_pattern? ']'
     | '(' open_sequence_pattern? ')'
     ;
-open_sequence_pattern: maybe_star_pattern ',' maybe_sequence_pattern? ;
-maybe_sequence_pattern: maybe_star_pattern (',' maybe_star_pattern)* ','? ;
-maybe_star_pattern: star_pattern | pattern ;
-star_pattern:
-    '*' pattern_capture_target
+
+open_sequence_pattern
+    : maybe_star_pattern ',' maybe_sequence_pattern?
+    ;
+
+maybe_sequence_pattern
+    : maybe_star_pattern (',' maybe_star_pattern)* ','?
+    ;
+
+maybe_star_pattern
+    : star_pattern
+    | pattern
+    ;
+
+star_pattern
+    : '*' pattern_capture_target
     | '*' wildcard_pattern
     ;
-mapping_pattern: '{' '}'
+
+mapping_pattern
+    : '{' '}'
     | '{' double_star_pattern ','? '}'
     | '{' items_pattern ',' double_star_pattern ','? '}'
     | '{' items_pattern ','? '}'
     ;
-items_pattern: key_value_pattern (',' key_value_pattern)* ;
-key_value_pattern: (literal_expr | attr) ':' pattern ;
-double_star_pattern: '**' pattern_capture_target ;
-class_pattern: name_or_attr '(' ')'
+
+items_pattern
+    : key_value_pattern (',' key_value_pattern)*
+    ;
+
+key_value_pattern
+    : (literal_expr | attr) ':' pattern
+    ;
+
+double_star_pattern
+    : '**' pattern_capture_target
+    ;
+
+class_pattern
+    : name_or_attr '(' ')'
     | name_or_attr '(' positional_patterns ','? ')'
     | name_or_attr '(' keyword_patterns ','? ')'
     | name_or_attr '(' positional_patterns ',' keyword_patterns ','? ')'
     ;
-positional_patterns: pattern (',' pattern)* ;
-keyword_patterns: keyword_pattern (',' keyword_pattern)* ;
-keyword_pattern: name '=' pattern ;
-
-test: or_test ('if' or_test 'else' test)? | lambdef;
-test_nocond: or_test | lambdef_nocond;
-lambdef: 'lambda' varargslist? ':' test;
-lambdef_nocond: 'lambda' varargslist? ':' test_nocond;
-or_test: and_test ('or' and_test)*;
-and_test: not_test ('and' not_test)*;
-not_test: 'not' not_test | comparison;
-comparison: expr (comp_op expr)*;
+
+positional_patterns
+    : pattern (',' pattern)*
+    ;
+
+keyword_patterns
+    : keyword_pattern (',' keyword_pattern)*
+    ;
+
+keyword_pattern
+    : name '=' pattern
+    ;
+
+test
+    : or_test ('if' or_test 'else' test)?
+    | lambdef
+    ;
+
+test_nocond
+    : or_test
+    | lambdef_nocond
+    ;
+
+lambdef
+    : 'lambda' varargslist? ':' test
+    ;
+
+lambdef_nocond
+    : 'lambda' varargslist? ':' test_nocond
+    ;
+
+or_test
+    : and_test ('or' and_test)*
+    ;
+
+and_test
+    : not_test ('and' not_test)*
+    ;
+
+not_test
+    : 'not' not_test
+    | comparison
+    ;
+
+comparison
+    : expr (comp_op expr)*
+    ;
+
 // <> isn't actually a valid comparison operator in Python. It's here for the
 // sake of a __future__ import described in PEP 401 (which really works :-)
-comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not';
-star_expr: '*' expr;
-expr: xor_expr ('|' xor_expr)*;
-xor_expr: and_expr ('^' and_expr)*;
-and_expr: shift_expr ('&' shift_expr)*;
-shift_expr: arith_expr (('<<'|'>>') arith_expr)*;
-arith_expr: term (('+'|'-') term)*;
-term: factor (('*'|'@'|'/'|'%'|'//') factor)*;
-factor: ('+'|'-'|'~') factor | power;
-power: atom_expr ('**' factor)?;
-atom_expr: AWAIT? atom trailer*;
-atom: '(' (yield_expr|testlist_comp)? ')'
-   | '[' testlist_comp? ']'
-   | '{' dictorsetmaker? '}'
-   | name | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' ;
-name : NAME | '_' | 'match' ;
-testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* ','? );
-trailer: '(' arglist? ')' | '[' subscriptlist ']' | '.' name ;
-subscriptlist: subscript_ (',' subscript_)* ','?;
-subscript_: test | test? ':' test? sliceop?;
-sliceop: ':' test?;
-exprlist: (expr|star_expr) (',' (expr|star_expr))* ','?;
-testlist: test (',' test)* ','?;
-dictorsetmaker: ( ((test ':' test | '**' expr)
-                   (comp_for | (',' (test ':' test | '**' expr))* ','?)) |
-                  ((test | star_expr)
-                   (comp_for | (',' (test | star_expr))* ','?)) );
-
-classdef: 'class' name ('(' arglist? ')')? ':' block;
-
-arglist: argument (',' argument)* ','?;
+comp_op
+    : '<'
+    | '>'
+    | '=='
+    | '>='
+    | '<='
+    | '<>'
+    | '!='
+    | 'in'
+    | 'not' 'in'
+    | 'is'
+    | 'is' 'not'
+    ;
+
+star_expr
+    : '*' expr
+    ;
+
+expr
+    : atom_expr
+    | expr '**' expr
+    | ('+' | '-' | '~')+ expr
+    | expr ('*' | '@' | '/' | '%' | '//') expr
+    | expr ('+' | '-') expr
+    | expr ('<<' | '>>') expr
+    | expr '&' expr
+    | expr '^' expr
+    | expr '|' expr
+    ;
+
+//expr: xor_expr ('|' xor_expr)*;
+//xor_expr: and_expr ('^' and_expr)*;
+//and_expr: shift_expr ('&' shift_expr)*;
+//shift_expr: arith_expr (('<<'|'>>') arith_expr)*;
+//arith_expr: term (('+'|'-') term)*;
+//term: factor (('*'|'@'|'/'|'%'|'//') factor)*;
+//factor: ('+'|'-'|'~') factor | power;
+//power: atom_expr ('**' factor)?;
+atom_expr
+    : AWAIT? atom trailer*
+    ;
+
+atom
+    : '(' (yield_expr | testlist_comp)? ')'
+    | '[' testlist_comp? ']'
+    | '{' dictorsetmaker? '}'
+    | name
+    | NUMBER
+    | STRING+
+    | '...'
+    | 'None'
+    | 'True'
+    | 'False'
+    ;
+
+name
+    : NAME
+    | '_'
+    | 'match'
+    ;
+
+testlist_comp
+    : (test | star_expr) (comp_for | (',' (test | star_expr))* ','?)
+    ;
+
+trailer
+    : '(' arglist? ')'
+    | '[' subscriptlist ']'
+    | '.' name
+    ;
+
+subscriptlist
+    : subscript_ (',' subscript_)* ','?
+    ;
+
+subscript_
+    : test
+    | test? ':' test? sliceop?
+    ;
+
+sliceop
+    : ':' test?
+    ;
+
+exprlist
+    : (expr | star_expr) (',' (expr | star_expr))* ','?
+    ;
+
+testlist
+    : test (',' test)* ','?
+    ;
+
+dictorsetmaker
+    : (
+        ((test ':' test | '**' expr) (comp_for | (',' (test ':' test | '**' expr))* ','?))
+        | ((test | star_expr) (comp_for | (',' (test | star_expr))* ','?))
+    )
+    ;
+
+classdef
+    : 'class' name ('(' arglist? ')')? ':' block
+    ;
+
+arglist
+    : argument (',' argument)* ','?
+    ;
 
 // The reason that keywords are test nodes instead of NAME is that using NAME
 // results in an ambiguity. ast.c makes sure it's a NAME.
@@ -221,19 +658,37 @@ arglist: argument (',' argument)* ','?;
 // Illegal combinations and orderings are blocked in ast.c:
 // multiple (test comp_for) arguments are blocked; keyword unpackings
 // that precede iterable unpackings are blocked; etc.
-argument: ( test comp_for? |
-            test '=' test |
-            '**' test |
-            '*' test );
+argument
+    : (test comp_for? | test '=' test | '**' test | '*' test)
+    ;
+
+comp_iter
+    : comp_for
+    | comp_if
+    ;
 
-comp_iter: comp_for | comp_if;
-comp_for: ASYNC? 'for' exprlist 'in' or_test comp_iter?;
-comp_if: 'if' test_nocond comp_iter?;
+comp_for
+    : ASYNC? 'for' exprlist 'in' or_test comp_iter?
+    ;
+
+comp_if
+    : 'if' test_nocond comp_iter?
+    ;
 
 // not used in grammar, but may appear in "node" passed from Parser to Compiler
-encoding_decl: name;
+encoding_decl
+    : name
+    ;
 
-yield_expr: 'yield' yield_arg?;
-yield_arg: 'from' test | testlist;
+yield_expr
+    : 'yield' yield_arg?
+    ;
+
+yield_arg
+    : 'from' test
+    | testlist
+    ;
 
-strings: STRING+ ;
+strings
+    : STRING+
+    ;
\ No newline at end of file
diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
index 0e24adf20..9b842dc8c 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
@@ -1,17 +1,18 @@
 package de.jplag.python3.grammar;
 
-import java.util.Deque;
-import java.util.LinkedList;
+import org.antlr.v4.runtime.*;
 
-import org.antlr.v4.runtime.CharStream;
-import org.antlr.v4.runtime.CommonToken;
-import org.antlr.v4.runtime.Lexer;
-import org.antlr.v4.runtime.Token;
+import java.util.ArrayDeque;
+import java.util.Deque;
 
 abstract class Python3LexerBase extends Lexer {
-    private LinkedList<Token> tokens = new LinkedList<>();
-    private Deque<Integer> indents = new LinkedList<>();
+    // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
+    private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
+    // The stack that keeps track of the indentation level.
+    private Deque<Integer> indents = new ArrayDeque<>();
+    // The amount of opened braces, brackets and parenthesis.
     private int opened = 0;
+    // The most recently produced token.
     private Token lastToken = null;
 
     protected Python3LexerBase(CharStream input) {
@@ -26,35 +27,38 @@ public void emit(Token t) {
 
     @Override
     public Token nextToken() {
+        // Check if the end-of-file is ahead and there are still some DEDENTS expected.
         if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
+            // Remove any trailing EOF tokens from our buffer.
+            for (int i = tokens.size() - 1; i >= 0; i--) {
+                if (tokens.get(i).getType() == EOF) {
+                    tokens.remove(i);
+                }
+            }
+
+            // First emit an extra line break that serves as the end of the statement.
             this.emit(commonToken(Python3Lexer.NEWLINE, "\n"));
-            this.removeTrailingEofTokens();
 
+            // Now emit as much DEDENT tokens as needed.
             while (!indents.isEmpty()) {
                 this.emit(createDedent());
                 indents.pop();
             }
 
-            this.emit(commonToken(EOF, "<EOF>"));
+            // Put the EOF back on the token stream.
+            this.emit(commonToken(Python3Lexer.EOF, "<EOF>"));
         }
 
         Token next = super.nextToken();
 
         if (next.getChannel() == Token.DEFAULT_CHANNEL) {
+            // Keep track of the last token on the default channel.
             this.lastToken = next;
         }
 
         return tokens.isEmpty() ? next : tokens.poll();
     }
 
-    private void removeTrailingEofTokens() {
-        for (int i = tokens.size() - 1; i >= 0; i--) {
-            if (tokens.get(i).getType() == EOF) {
-                tokens.remove(i);
-            }
-        }
-    }
-
     private Token createDedent() {
         CommonToken dedent = commonToken(Python3Lexer.DEDENT, "");
         dedent.setLine(this.lastToken.getLine());
@@ -67,21 +71,24 @@ private CommonToken commonToken(int type, String text) {
         return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
     }
 
-    /**
-     * Calculates the indentation of the provided spaces, taking the following rules into account:
-     * <p>
-     * "Tabs are replaced (from left to right) by one to eight spaces such that the total number of characters up to and
-     * including the replacement is a multiple of eight [...]"
-     * <p>
-     * -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
-     **/
+    // Calculates the indentation of the provided spaces, taking the
+    // following rules into account:
+    //
+    // "Tabs are replaced (from left to right) by one to eight spaces
+    //  such that the total number of characters up to and including
+    //  the replacement is a multiple of eight [...]"
+    //
+    //  -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
     static int getIndentationCount(String spaces) {
         int count = 0;
         for (char ch : spaces.toCharArray()) {
-            if (ch == '\t') {
-                count += 8 - (count % 8);
-            } else {
-                count++;
+            switch (ch) {
+                case '\t':
+                    count += 8 - (count % 8);
+                    break;
+                default:
+                    // A normal space char.
+                    count++;
             }
         }
 
@@ -92,34 +99,42 @@ boolean atStartOfInput() {
         return super.getCharPositionInLine() == 0 && super.getLine() == 1;
     }
 
-    void openBrace() {
+    void openBrace(){
         this.opened++;
     }
 
-    void closeBrace() {
+    void closeBrace(){
         this.opened--;
     }
 
-    void onNewLine() {
+    void onNewLine(){
         String newLine = getText().replaceAll("[^\r\n\f]+", "");
         String spaces = getText().replaceAll("[\r\n\f]+", "");
 
+        // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to
+        // satisfy the final newline needed by the single_put rule used by the REPL.
         int next = _input.LA(1);
         int nextnext = _input.LA(2);
         if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) {
+            // If we're inside a list or on a blank line, ignore all indents,
+            // dedents and line breaks.
             skip();
-        } else {
+        }
+        else {
             emit(commonToken(Python3Lexer.NEWLINE, newLine));
             int indent = getIndentationCount(spaces);
             int previous = indents.isEmpty() ? 0 : indents.peek();
-
             if (indent == previous) {
+                // skip indents of the same size as the present indent-size
                 skip();
-            } else if (indent > previous) {
+            }
+            else if (indent > previous) {
                 indents.push(indent);
                 emit(commonToken(Python3Lexer.INDENT, spaces));
-            } else {
-                while (!indents.isEmpty() && indents.peek() > indent) {
+            }
+            else {
+                // Possibly emit more than 1 DEDENT token.
+                while(!indents.isEmpty() && indents.peek() > indent) {
                     this.emit(createDedent());
                     indents.pop();
                 }
@@ -128,11 +143,12 @@ void onNewLine() {
     }
 
     @Override
-    public void reset() {
-        tokens = new LinkedList<>();
-        indents = new LinkedList<>();
+    public void reset()
+    {
+        tokens = new java.util.LinkedList<>();
+        indents = new ArrayDeque<>();
         opened = 0;
         lastToken = null;
         super.reset();
     }
-}
+}
\ No newline at end of file
diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
index 44b5926a4..c3623ed74 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
@@ -1,18 +1,21 @@
 package de.jplag.python3.grammar;
 
-import org.antlr.v4.runtime.Parser;
-import org.antlr.v4.runtime.TokenStream;
+import org.antlr.v4.runtime.*;
 
-public abstract class Python3ParserBase extends Parser {
-    protected Python3ParserBase(TokenStream input) {
+public abstract class Python3ParserBase extends Parser
+{
+    protected Python3ParserBase(TokenStream input)
+    {
         super(input);
     }
 
-    public boolean cannotBePlusMinus() {
+    public boolean CannotBePlusMinus()
+    {
         return true;
     }
 
-    public boolean cannotBeDotLpEq() {
+    public boolean CannotBeDotLpEq()
+    {
         return true;
     }
-}
+}
\ No newline at end of file

From e8bb6fb3e2adbffb1c6f618047c854caa1754a23 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 14:05:47 +0200
Subject: [PATCH 2/7] Migrated python language module to new syntax

---
 languages/python-3/pom.xml                    |   5 +
 .../jplag/python3/JplagPython3Listener.java   | 216 ------------------
 .../main/java/de/jplag/python3/Parser.java    |  78 -------
 .../java/de/jplag/python3/PythonLanguage.java |  18 +-
 .../java/de/jplag/python3/PythonListener.java | 107 +++++++++
 .../de/jplag/python3/PythonParserAdapter.java |  33 +++
 6 files changed, 148 insertions(+), 309 deletions(-)
 delete mode 100644 languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java
 delete mode 100644 languages/python-3/src/main/java/de/jplag/python3/Parser.java
 create mode 100644 languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
 create mode 100644 languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java

diff --git a/languages/python-3/pom.xml b/languages/python-3/pom.xml
index 92712c0b1..fb6865e69 100644
--- a/languages/python-3/pom.xml
+++ b/languages/python-3/pom.xml
@@ -13,6 +13,11 @@
             <groupId>org.antlr</groupId>
             <artifactId>antlr4-runtime</artifactId>
         </dependency>
+        <dependency>
+            <groupId>de.jplag</groupId>
+            <artifactId>language-antlr-utils</artifactId>
+            <version>${revision}</version>
+        </dependency>
     </dependencies>
 
     <build>
diff --git a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java b/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java
deleted file mode 100644
index 695d07e40..000000000
--- a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java
+++ /dev/null
@@ -1,216 +0,0 @@
-package de.jplag.python3;
-
-import static de.jplag.python3.Python3TokenType.APPLY;
-import static de.jplag.python3.Python3TokenType.ARRAY;
-import static de.jplag.python3.Python3TokenType.ASSERT;
-import static de.jplag.python3.Python3TokenType.ASSIGN;
-import static de.jplag.python3.Python3TokenType.BREAK;
-import static de.jplag.python3.Python3TokenType.CLASS_BEGIN;
-import static de.jplag.python3.Python3TokenType.CLASS_END;
-import static de.jplag.python3.Python3TokenType.CONTINUE;
-import static de.jplag.python3.Python3TokenType.DEC_BEGIN;
-import static de.jplag.python3.Python3TokenType.DEC_END;
-import static de.jplag.python3.Python3TokenType.DEL;
-import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN;
-import static de.jplag.python3.Python3TokenType.EXCEPT_END;
-import static de.jplag.python3.Python3TokenType.FINALLY;
-import static de.jplag.python3.Python3TokenType.FOR_BEGIN;
-import static de.jplag.python3.Python3TokenType.FOR_END;
-import static de.jplag.python3.Python3TokenType.IF_BEGIN;
-import static de.jplag.python3.Python3TokenType.IF_END;
-import static de.jplag.python3.Python3TokenType.IMPORT;
-import static de.jplag.python3.Python3TokenType.LAMBDA;
-import static de.jplag.python3.Python3TokenType.METHOD_BEGIN;
-import static de.jplag.python3.Python3TokenType.METHOD_END;
-import static de.jplag.python3.Python3TokenType.RAISE;
-import static de.jplag.python3.Python3TokenType.RETURN;
-import static de.jplag.python3.Python3TokenType.TRY_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_END;
-import static de.jplag.python3.Python3TokenType.WITH_BEGIN;
-import static de.jplag.python3.Python3TokenType.WITH_END;
-import static de.jplag.python3.Python3TokenType.YIELD;
-
-import org.antlr.v4.runtime.tree.TerminalNode;
-
-import de.jplag.python3.grammar.Python3Parser;
-import de.jplag.python3.grammar.Python3ParserBaseListener;
-
-public class JplagPython3Listener extends Python3ParserBaseListener {
-
-    private final Parser parser;
-
-    public JplagPython3Listener(Parser parser) {
-        this.parser = parser;
-    }
-
-    @Override
-    public void enterAssert_stmt(Python3Parser.Assert_stmtContext ctx) {
-        parser.add(ASSERT, ctx.getStart());
-    }
-
-    @Override
-    public void enterDecorated(Python3Parser.DecoratedContext ctx) {
-        parser.add(DEC_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitDecorated(Python3Parser.DecoratedContext ctx) {
-        parser.addEnd(DEC_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterRaise_stmt(Python3Parser.Raise_stmtContext ctx) {
-        parser.add(RAISE, ctx.getStart());
-    }
-
-    @Override
-    public void enterExcept_clause(Python3Parser.Except_clauseContext ctx) {
-        parser.add(EXCEPT_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitExcept_clause(Python3Parser.Except_clauseContext ctx) {
-        parser.addEnd(EXCEPT_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterDictorsetmaker(Python3Parser.DictorsetmakerContext ctx) {
-        parser.add(ARRAY, ctx.getStart());
-    }
-
-    @Override
-    public void enterReturn_stmt(Python3Parser.Return_stmtContext ctx) {
-        parser.add(RETURN, ctx.getStart());
-    }
-
-    @Override
-    public void enterWhile_stmt(Python3Parser.While_stmtContext ctx) {
-        parser.add(WHILE_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitWhile_stmt(Python3Parser.While_stmtContext ctx) {
-        parser.addEnd(WHILE_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterYield_arg(Python3Parser.Yield_argContext ctx) {
-        parser.add(YIELD, ctx.getStart());
-    }
-
-    @Override
-    public void enterImport_stmt(Python3Parser.Import_stmtContext ctx) {
-        parser.add(IMPORT, ctx.getStart());
-    }
-
-    @Override
-    public void enterLambdef(Python3Parser.LambdefContext ctx) {
-        parser.add(LAMBDA, ctx.getStart());
-    }
-
-    @Override
-    public void enterTry_stmt(Python3Parser.Try_stmtContext ctx) {
-        parser.add(TRY_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void enterBreak_stmt(Python3Parser.Break_stmtContext ctx) {
-        parser.add(BREAK, ctx.getStart());
-    }
-
-    @Override
-    public void enterTestlist_comp(Python3Parser.Testlist_compContext ctx) {
-        if (ctx.getText().contains(",")) {
-            parser.add(ARRAY, ctx.getStart());
-        }
-    }
-
-    @Override
-    public void enterIf_stmt(Python3Parser.If_stmtContext ctx) {
-        parser.add(IF_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitIf_stmt(Python3Parser.If_stmtContext ctx) {
-        parser.addEnd(IF_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterWith_stmt(Python3Parser.With_stmtContext ctx) {
-        parser.add(WITH_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitWith_stmt(Python3Parser.With_stmtContext ctx) {
-        parser.addEnd(WITH_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterClassdef(Python3Parser.ClassdefContext ctx) {
-        parser.add(CLASS_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitClassdef(Python3Parser.ClassdefContext ctx) {
-        parser.addEnd(CLASS_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterTrailer(Python3Parser.TrailerContext ctx) {
-        if (ctx.getText().charAt(0) == '(') {
-            parser.add(APPLY, ctx.getStart());
-        } else {
-            parser.add(ARRAY, ctx.getStart());
-        }
-    }
-
-    @Override
-    public void enterFuncdef(Python3Parser.FuncdefContext ctx) {
-        parser.add(METHOD_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitFuncdef(Python3Parser.FuncdefContext ctx) {
-        parser.addEnd(METHOD_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterAugassign(Python3Parser.AugassignContext ctx) {
-        parser.add(ASSIGN, ctx.getStart());
-    }
-
-    @Override
-    public void enterYield_stmt(Python3Parser.Yield_stmtContext ctx) {
-        parser.add(YIELD, ctx.getStart());
-    }
-
-    @Override
-    public void enterContinue_stmt(Python3Parser.Continue_stmtContext ctx) {
-        parser.add(CONTINUE, ctx.getStart());
-    }
-
-    @Override
-    public void enterFor_stmt(Python3Parser.For_stmtContext ctx) {
-        parser.add(FOR_BEGIN, ctx.getStart());
-    }
-
-    @Override
-    public void exitFor_stmt(Python3Parser.For_stmtContext ctx) {
-        parser.addEnd(FOR_END, ctx.getStop());
-    }
-
-    @Override
-    public void enterDel_stmt(Python3Parser.Del_stmtContext ctx) {
-        parser.add(DEL, ctx.getStart());
-    }
-
-    @Override
-    public void visitTerminal(TerminalNode node) {
-        if (node.getText().equals("=")) {
-            parser.add(ASSIGN, node.getSymbol());
-        } else if (node.getText().equals("finally")) {
-            parser.add(FINALLY, node.getSymbol());
-        }
-    }
-}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/Parser.java b/languages/python-3/src/main/java/de/jplag/python3/Parser.java
deleted file mode 100644
index 2dc352bfe..000000000
--- a/languages/python-3/src/main/java/de/jplag/python3/Parser.java
+++ /dev/null
@@ -1,78 +0,0 @@
-package de.jplag.python3;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Set;
-
-import org.antlr.v4.runtime.CharStreams;
-import org.antlr.v4.runtime.CommonTokenStream;
-import org.antlr.v4.runtime.tree.ParseTree;
-import org.antlr.v4.runtime.tree.ParseTreeWalker;
-
-import de.jplag.AbstractParser;
-import de.jplag.ParsingException;
-import de.jplag.Token;
-import de.jplag.TokenType;
-import de.jplag.python3.grammar.Python3Lexer;
-import de.jplag.python3.grammar.Python3Parser;
-import de.jplag.python3.grammar.Python3Parser.File_inputContext;
-import de.jplag.util.FileUtils;
-
-public class Parser extends AbstractParser {
-
-    private List<Token> tokens;
-    private File currentFile;
-
-    /**
-     * Creates the parser.
-     */
-    public Parser() {
-        super();
-    }
-
-    public List<Token> parse(Set<File> files) throws ParsingException {
-        tokens = new ArrayList<>();
-        for (File file : files) {
-            logger.trace("Parsing file {}", file.getName());
-            parseFile(file);
-            tokens.add(Token.fileEnd(file));
-        }
-        return tokens;
-    }
-
-    private void parseFile(File file) throws ParsingException {
-        try (BufferedReader reader = FileUtils.openFileReader(file)) {
-            currentFile = file;
-
-            // create a lexer that feeds off of input CharStream
-            Python3Lexer lexer = new Python3Lexer(CharStreams.fromReader(reader));
-
-            // create a buffer of tokens pulled from the lexer
-            CommonTokenStream tokens = new CommonTokenStream(lexer);
-
-            // create a parser that feeds off the tokens buffer
-            Python3Parser parser = new Python3Parser(tokens);
-            File_inputContext in = parser.file_input();
-
-            ParseTreeWalker ptw = new ParseTreeWalker();
-            for (int i = 0; i < in.getChildCount(); i++) {
-                ParseTree pt = in.getChild(i);
-                ptw.walk(new JplagPython3Listener(this), pt);
-            }
-
-        } catch (IOException e) {
-            throw new ParsingException(file, e.getMessage(), e);
-        }
-    }
-
-    public void add(TokenType type, org.antlr.v4.runtime.Token token) {
-        tokens.add(new Token(type, currentFile, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length()));
-    }
-
-    public void addEnd(TokenType type, org.antlr.v4.runtime.Token token) {
-        tokens.add(new Token(type, currentFile, token.getLine(), tokens.get(tokens.size() - 1).getColumn() + 1, 0));
-    }
-}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
index b5a8fd73f..3df658728 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java
@@ -1,23 +1,16 @@
 package de.jplag.python3;
 
-import java.io.File;
-import java.util.List;
-import java.util.Set;
-
 import org.kohsuke.MetaInfServices;
 
-import de.jplag.ParsingException;
-import de.jplag.Token;
+import de.jplag.antlr.AbstractAntlrLanguage;
 
 @MetaInfServices(de.jplag.Language.class)
-public class PythonLanguage implements de.jplag.Language {
+public class PythonLanguage extends AbstractAntlrLanguage {
 
     private static final String IDENTIFIER = "python3";
 
-    private final Parser parser;
-
     public PythonLanguage() {
-        parser = new Parser();
+        super(new PythonParserAdapter());
     }
 
     @Override
@@ -39,9 +32,4 @@ public String getIdentifier() {
     public int minimumTokenMatch() {
         return 12;
     }
-
-    @Override
-    public List<Token> parse(Set<File> files, boolean normalize) throws ParsingException {
-        return this.parser.parse(files);
-    }
 }
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
new file mode 100644
index 000000000..ab224ae3f
--- /dev/null
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
@@ -0,0 +1,107 @@
+package de.jplag.python3;
+
+import static de.jplag.python3.Python3TokenType.APPLY;
+import static de.jplag.python3.Python3TokenType.ARRAY;
+import static de.jplag.python3.Python3TokenType.ASSERT;
+import static de.jplag.python3.Python3TokenType.ASSIGN;
+import static de.jplag.python3.Python3TokenType.BREAK;
+import static de.jplag.python3.Python3TokenType.CLASS_BEGIN;
+import static de.jplag.python3.Python3TokenType.CLASS_END;
+import static de.jplag.python3.Python3TokenType.CONTINUE;
+import static de.jplag.python3.Python3TokenType.DEC_BEGIN;
+import static de.jplag.python3.Python3TokenType.DEC_END;
+import static de.jplag.python3.Python3TokenType.DEL;
+import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN;
+import static de.jplag.python3.Python3TokenType.EXCEPT_END;
+import static de.jplag.python3.Python3TokenType.FINALLY;
+import static de.jplag.python3.Python3TokenType.FOR_BEGIN;
+import static de.jplag.python3.Python3TokenType.FOR_END;
+import static de.jplag.python3.Python3TokenType.IF_BEGIN;
+import static de.jplag.python3.Python3TokenType.IF_END;
+import static de.jplag.python3.Python3TokenType.IMPORT;
+import static de.jplag.python3.Python3TokenType.LAMBDA;
+import static de.jplag.python3.Python3TokenType.METHOD_BEGIN;
+import static de.jplag.python3.Python3TokenType.METHOD_END;
+import static de.jplag.python3.Python3TokenType.RAISE;
+import static de.jplag.python3.Python3TokenType.RETURN;
+import static de.jplag.python3.Python3TokenType.TRY_BEGIN;
+import static de.jplag.python3.Python3TokenType.WHILE_BEGIN;
+import static de.jplag.python3.Python3TokenType.WHILE_END;
+import static de.jplag.python3.Python3TokenType.WITH_BEGIN;
+import static de.jplag.python3.Python3TokenType.WITH_END;
+import static de.jplag.python3.Python3TokenType.YIELD;
+
+import de.jplag.antlr.AbstractAntlrListener;
+import de.jplag.python3.grammar.Python3Parser;
+import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.AugassignContext;
+import de.jplag.python3.grammar.Python3Parser.Break_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.ClassdefContext;
+import de.jplag.python3.grammar.Python3Parser.Continue_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.DecoratedContext;
+import de.jplag.python3.grammar.Python3Parser.Del_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.DictorsetmakerContext;
+import de.jplag.python3.grammar.Python3Parser.Except_clauseContext;
+import de.jplag.python3.grammar.Python3Parser.For_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.FuncdefContext;
+import de.jplag.python3.grammar.Python3Parser.If_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Import_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.LambdefContext;
+import de.jplag.python3.grammar.Python3Parser.Raise_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Return_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Testlist_compContext;
+import de.jplag.python3.grammar.Python3Parser.TrailerContext;
+import de.jplag.python3.grammar.Python3Parser.Try_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.While_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.With_stmtContext;
+import de.jplag.python3.grammar.Python3Parser.Yield_argContext;
+import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext;
+
+public class PythonListener extends AbstractAntlrListener {
+    public PythonListener() {
+        statements();
+        controlStructures();
+        contexts();
+        values();
+    }
+
+    private void statements() {
+        visit(Assert_stmtContext.class).map(ASSERT);
+        visit(Raise_stmtContext.class).map(RAISE);
+        visit(Return_stmtContext.class).map(RETURN);
+        visit(Yield_argContext.class).map(YIELD);
+        visit(Yield_stmtContext.class).map(YIELD);
+        visit(Import_stmtContext.class).map(IMPORT);
+        visit(Break_stmtContext.class).map(BREAK);
+        visit(Continue_stmtContext.class).map(CONTINUE);
+        visit(Del_stmtContext.class).map(DEL);
+        visit(Python3Parser.FINALLY).map(FINALLY);
+
+        visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END);
+    }
+
+    private void controlStructures() {
+        visit(While_stmtContext.class).map(WHILE_BEGIN, WHILE_END);
+        visit(Try_stmtContext.class).map(TRY_BEGIN);
+        visit(If_stmtContext.class).map(IF_BEGIN, IF_END);
+        visit(With_stmtContext.class).map(WITH_BEGIN, WITH_END);
+        visit(For_stmtContext.class).map(FOR_BEGIN, FOR_END);
+    }
+
+    private void contexts() {
+        visit(DecoratedContext.class).map(DEC_BEGIN, DEC_END);
+        visit(LambdefContext.class).map(LAMBDA);
+        visit(ClassdefContext.class).map(CLASS_BEGIN, CLASS_END);
+        visit(FuncdefContext.class).map(METHOD_BEGIN, METHOD_END);
+    }
+
+    private void values() {
+        visit(DictorsetmakerContext.class).map(ARRAY);
+        visit(Testlist_compContext.class, context -> context.getText().contains(",")).map(ARRAY);
+        visit(AugassignContext.class).map(ASSIGN);
+        visit(Python3Parser.ASSIGN).map(ASSIGN);
+
+        visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) == '(').map(APPLY);
+        visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) != '(').map(ARRAY);
+    }
+}
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java
new file mode 100644
index 000000000..8d99920f0
--- /dev/null
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java
@@ -0,0 +1,33 @@
+package de.jplag.python3;
+
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.Lexer;
+import org.antlr.v4.runtime.ParserRuleContext;
+
+import de.jplag.antlr.AbstractAntlrListener;
+import de.jplag.antlr.AbstractAntlrParserAdapter;
+import de.jplag.python3.grammar.Python3Lexer;
+import de.jplag.python3.grammar.Python3Parser;
+
+public class PythonParserAdapter extends AbstractAntlrParserAdapter<Python3Parser> {
+    @Override
+    protected Lexer createLexer(CharStream input) {
+        return new Python3Lexer(input);
+    }
+
+    @Override
+    protected Python3Parser createParser(CommonTokenStream tokenStream) {
+        return new Python3Parser(tokenStream);
+    }
+
+    @Override
+    protected ParserRuleContext getEntryContext(Python3Parser parser) {
+        return parser.file_input();
+    }
+
+    @Override
+    protected AbstractAntlrListener getListener() {
+        return new PythonListener();
+    }
+}

From c77b372edf375a6cf56f18b9785efd86204a1f64 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 14:06:34 +0200
Subject: [PATCH 3/7] Migrated python language module to new syntax

---
 .../python3/grammar/Python3LexerBase.java     | 30 ++++++++-----------
 .../python3/grammar/Python3ParserBase.java    | 12 +++-----
 2 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
index 9b842dc8c..b5a0e5501 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java
@@ -1,10 +1,10 @@
 package de.jplag.python3.grammar;
 
-import org.antlr.v4.runtime.*;
-
 import java.util.ArrayDeque;
 import java.util.Deque;
 
+import org.antlr.v4.runtime.*;
+
 abstract class Python3LexerBase extends Lexer {
     // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
     private java.util.LinkedList<Token> tokens = new java.util.LinkedList<>();
@@ -75,10 +75,10 @@ private CommonToken commonToken(int type, String text) {
     // following rules into account:
     //
     // "Tabs are replaced (from left to right) by one to eight spaces
-    //  such that the total number of characters up to and including
-    //  the replacement is a multiple of eight [...]"
+    // such that the total number of characters up to and including
+    // the replacement is a multiple of eight [...]"
     //
-    //  -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
+    // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
     static int getIndentationCount(String spaces) {
         int count = 0;
         for (char ch : spaces.toCharArray()) {
@@ -99,15 +99,15 @@ boolean atStartOfInput() {
         return super.getCharPositionInLine() == 0 && super.getLine() == 1;
     }
 
-    void openBrace(){
+    void openBrace() {
         this.opened++;
     }
 
-    void closeBrace(){
+    void closeBrace() {
         this.opened--;
     }
 
-    void onNewLine(){
+    void onNewLine() {
         String newLine = getText().replaceAll("[^\r\n\f]+", "");
         String spaces = getText().replaceAll("[\r\n\f]+", "");
 
@@ -119,22 +119,19 @@ void onNewLine(){
             // If we're inside a list or on a blank line, ignore all indents,
             // dedents and line breaks.
             skip();
-        }
-        else {
+        } else {
             emit(commonToken(Python3Lexer.NEWLINE, newLine));
             int indent = getIndentationCount(spaces);
             int previous = indents.isEmpty() ? 0 : indents.peek();
             if (indent == previous) {
                 // skip indents of the same size as the present indent-size
                 skip();
-            }
-            else if (indent > previous) {
+            } else if (indent > previous) {
                 indents.push(indent);
                 emit(commonToken(Python3Lexer.INDENT, spaces));
-            }
-            else {
+            } else {
                 // Possibly emit more than 1 DEDENT token.
-                while(!indents.isEmpty() && indents.peek() > indent) {
+                while (!indents.isEmpty() && indents.peek() > indent) {
                     this.emit(createDedent());
                     indents.pop();
                 }
@@ -143,8 +140,7 @@ else if (indent > previous) {
     }
 
     @Override
-    public void reset()
-    {
+    public void reset() {
         tokens = new java.util.LinkedList<>();
         indents = new ArrayDeque<>();
         opened = 0;
diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
index c3623ed74..713af92c1 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java
@@ -2,20 +2,16 @@
 
 import org.antlr.v4.runtime.*;
 
-public abstract class Python3ParserBase extends Parser
-{
-    protected Python3ParserBase(TokenStream input)
-    {
+public abstract class Python3ParserBase extends Parser {
+    protected Python3ParserBase(TokenStream input) {
         super(input);
     }
 
-    public boolean CannotBePlusMinus()
-    {
+    public boolean CannotBePlusMinus() {
         return true;
     }
 
-    public boolean CannotBeDotLpEq()
-    {
+    public boolean CannotBeDotLpEq() {
         return true;
     }
 }
\ No newline at end of file

From 0a67ccc1a89581b36170c50015718d424921caf3 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 14:14:36 +0200
Subject: [PATCH 4/7] Added tokens for ASYNC and AWAIT from python 3.7

---
 .../de/jplag/python3/Python3TokenType.java    |  4 ++-
 .../java/de/jplag/python3/PythonListener.java | 36 +++----------------
 2 files changed, 8 insertions(+), 32 deletions(-)

diff --git a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
index e4a684c9b..850522470 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java
@@ -32,7 +32,9 @@ public enum Python3TokenType implements TokenType {
     YIELD("YIELD"),
     DEL("DEL"),
     WITH_BEGIN("WITH}"),
-    WITH_END("}WITH");
+    WITH_END("}WITH"),
+    ASYNC("ASYNC"),
+    AWAIT("AWAIT");
 
     private final String description;
 
diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
index ab224ae3f..c4c7315ec 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
@@ -1,36 +1,5 @@
 package de.jplag.python3;
 
-import static de.jplag.python3.Python3TokenType.APPLY;
-import static de.jplag.python3.Python3TokenType.ARRAY;
-import static de.jplag.python3.Python3TokenType.ASSERT;
-import static de.jplag.python3.Python3TokenType.ASSIGN;
-import static de.jplag.python3.Python3TokenType.BREAK;
-import static de.jplag.python3.Python3TokenType.CLASS_BEGIN;
-import static de.jplag.python3.Python3TokenType.CLASS_END;
-import static de.jplag.python3.Python3TokenType.CONTINUE;
-import static de.jplag.python3.Python3TokenType.DEC_BEGIN;
-import static de.jplag.python3.Python3TokenType.DEC_END;
-import static de.jplag.python3.Python3TokenType.DEL;
-import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN;
-import static de.jplag.python3.Python3TokenType.EXCEPT_END;
-import static de.jplag.python3.Python3TokenType.FINALLY;
-import static de.jplag.python3.Python3TokenType.FOR_BEGIN;
-import static de.jplag.python3.Python3TokenType.FOR_END;
-import static de.jplag.python3.Python3TokenType.IF_BEGIN;
-import static de.jplag.python3.Python3TokenType.IF_END;
-import static de.jplag.python3.Python3TokenType.IMPORT;
-import static de.jplag.python3.Python3TokenType.LAMBDA;
-import static de.jplag.python3.Python3TokenType.METHOD_BEGIN;
-import static de.jplag.python3.Python3TokenType.METHOD_END;
-import static de.jplag.python3.Python3TokenType.RAISE;
-import static de.jplag.python3.Python3TokenType.RETURN;
-import static de.jplag.python3.Python3TokenType.TRY_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_BEGIN;
-import static de.jplag.python3.Python3TokenType.WHILE_END;
-import static de.jplag.python3.Python3TokenType.WITH_BEGIN;
-import static de.jplag.python3.Python3TokenType.WITH_END;
-import static de.jplag.python3.Python3TokenType.YIELD;
-
 import de.jplag.antlr.AbstractAntlrListener;
 import de.jplag.python3.grammar.Python3Parser;
 import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext;
@@ -57,6 +26,8 @@
 import de.jplag.python3.grammar.Python3Parser.Yield_argContext;
 import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext;
 
+import static de.jplag.python3.Python3TokenType.*;
+
 public class PythonListener extends AbstractAntlrListener {
     public PythonListener() {
         statements();
@@ -77,6 +48,9 @@ private void statements() {
         visit(Del_stmtContext.class).map(DEL);
         visit(Python3Parser.FINALLY).map(FINALLY);
 
+        visit(Python3Parser.ASYNC).map(ASYNC);
+        visit(Python3Parser.AWAIT).map(AWAIT);
+
         visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END);
     }
 

From 6d8d66eae27d92c5fa0de306c0520ae655a7cac8 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 14:17:44 +0200
Subject: [PATCH 5/7] Spotless

---
 .../src/main/java/de/jplag/python3/PythonListener.java        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
index c4c7315ec..aa0dabb18 100644
--- a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
+++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java
@@ -1,5 +1,7 @@
 package de.jplag.python3;
 
+import static de.jplag.python3.Python3TokenType.*;
+
 import de.jplag.antlr.AbstractAntlrListener;
 import de.jplag.python3.grammar.Python3Parser;
 import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext;
@@ -26,8 +28,6 @@
 import de.jplag.python3.grammar.Python3Parser.Yield_argContext;
 import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext;
 
-import static de.jplag.python3.Python3TokenType.*;
-
 public class PythonListener extends AbstractAntlrListener {
     public PythonListener() {
         statements();

From c559a3567b16e8e7e66138963d40313288868286 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Fri, 3 May 2024 14:44:24 +0200
Subject: [PATCH 6/7] Added async and await to the tests

---
 .../src/test/resources/de/jplag/python3/test_utils.py      | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
index 34d92252c..bfd3e8bf4 100644
--- a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
+++ b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py
@@ -500,4 +500,9 @@ def force_legacy_ssl_support():
 
 def switchWithBreak():
     while True:
-        break
\ No newline at end of file
+        break
+
+async def x():
+    return ""
+
+x = await x()
\ No newline at end of file

From fbe78a7e6ead43abe16fcc4625e784209cc58d91 Mon Sep 17 00:00:00 2001
From: Alexander Milster <alexander.milster@gmx.de>
Date: Thu, 16 May 2024 13:07:54 +0200
Subject: [PATCH 7/7] Designated the python module as beta

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 507651819..eeda7526f 100644
--- a/README.md
+++ b/README.md
@@ -32,7 +32,7 @@ All supported languages and their supported versions are listed below.
 | [C](https://isocpp.org)                                |                                                                                     11 | c                 |                               legacy                                |  JavaCC   |
 | [C++](https://isocpp.org)                              |                                                                                     14 | cpp               |                                beta                                 |  ANTLR 4  |
 | [C#](https://docs.microsoft.com/en-us/dotnet/csharp/)  |                                                                                      6 | csharp            |                               mature                                |  ANTLR 4  |
-| [Python](https://www.python.org)                       |                                                                                    3.6 | python3           |                               legacy                                |  ANTLR 4  |
+| [Python](https://www.python.org)                       |                                                                                    3.6 | python3           |                                beta                                 |  ANTLR 4  |
 | [JavaScript](https://www.javascript.com/)              |                                                                                    ES6 | javascript        |                                beta                                 |  ANTLR 4  |
 | [TypeScript](https://www.typescriptlang.org/)          | [~5](https://github.com/antlr/grammars-v4/tree/master/javascript/typescript/README.md) | typescript        |                                beta                                 |  ANTLR 4  |
 | [Go](https://go.dev)                                   |                                                                                   1.17 | golang            |                                beta                                 |  ANTLR 4  |