From 408e52482736c6c9a23177345468866859f72377 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 13:24:42 +0200 Subject: [PATCH 1/7] Added new antlr grammar --- .../de/jplag/python3/grammar/Python3Lexer.g4 | 447 ++++------ .../de/jplag/python3/grammar/Python3Parser.g4 | 781 ++++++++++++++---- .../python3/grammar/Python3LexerBase.java | 100 ++- .../python3/grammar/Python3ParserBase.java | 17 +- 4 files changed, 864 insertions(+), 481 deletions(-) diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 index 9b5fee1dc..8b36564b9 100644 --- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 +++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Lexer.g4 @@ -28,190 +28,159 @@ * https://github.com/bkiers/python3-parser * Developed by : Bart Kiers, bart@big-o.nl */ + +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true + lexer grammar Python3Lexer; // All comments that start with "///" are copy-pasted from // The Python Language Reference -tokens { INDENT, DEDENT } +tokens { + INDENT, + DEDENT +} options { - superClass=Python3LexerBase; + superClass = Python3LexerBase; } +// Insert here @header for C++ lexer. + /* * lexer rules */ -STRING - : STRING_LITERAL - | BYTES_LITERAL - ; - -NUMBER - : INTEGER - | FLOAT_NUMBER - | IMAG_NUMBER - ; - -INTEGER - : DECIMAL_INTEGER - | OCT_INTEGER - | HEX_INTEGER - | BIN_INTEGER - ; - -AND : 'and'; -AS : 'as'; -ASSERT : 'assert'; -ASYNC : 'async'; -AWAIT : 'await'; -BREAK : 'break'; -CASE : 'case' ; -CLASS : 'class'; -CONTINUE : 'continue'; -DEF : 'def'; -DEL : 'del'; -ELIF : 'elif'; -ELSE : 'else'; -EXCEPT : 'except'; -FALSE : 'False'; -FINALLY : 'finally'; -FOR : 'for'; -FROM : 'from'; -GLOBAL : 'global'; -IF : 'if'; -IMPORT : 'import'; -IN : 'in'; -IS : 'is'; -LAMBDA : 'lambda'; -MATCH : 'match' ; -NONE : 'None'; -NONLOCAL : 'nonlocal'; -NOT : 'not'; -OR : 'or'; -PASS : 'pass'; -RAISE : 'raise'; -RETURN : 'return'; -TRUE : 'True'; -TRY : 'try'; -UNDERSCORE : '_' ; -WHILE : 'while'; -WITH : 'with'; -YIELD : 'yield'; - -NEWLINE - : ( {this.atStartOfInput()}? SPACES - | ( '\r'? '\n' | '\r' | '\f' ) SPACES? - ) - {this.onNewLine();} - ; +STRING: STRING_LITERAL | BYTES_LITERAL; + +NUMBER: INTEGER | FLOAT_NUMBER | IMAG_NUMBER; + +INTEGER: DECIMAL_INTEGER | OCT_INTEGER | HEX_INTEGER | BIN_INTEGER; + +AND : 'and'; +AS : 'as'; +ASSERT : 'assert'; +ASYNC : 'async'; +AWAIT : 'await'; +BREAK : 'break'; +CASE : 'case'; +CLASS : 'class'; +CONTINUE : 'continue'; +DEF : 'def'; +DEL : 'del'; +ELIF : 'elif'; +ELSE : 'else'; +EXCEPT : 'except'; +FALSE : 'False'; +FINALLY : 'finally'; +FOR : 'for'; +FROM : 'from'; +GLOBAL : 'global'; +IF : 'if'; +IMPORT : 'import'; +IN : 'in'; +IS : 'is'; +LAMBDA : 'lambda'; +MATCH : 'match'; +NONE : 'None'; +NONLOCAL : 'nonlocal'; +NOT : 'not'; +OR : 'or'; +PASS : 'pass'; +RAISE : 'raise'; +RETURN : 'return'; +TRUE : 'True'; +TRY : 'try'; +UNDERSCORE : '_'; +WHILE : 'while'; +WITH : 'with'; +YIELD : 'yield'; + +NEWLINE: ({this.atStartOfInput()}? SPACES | ( '\r'? '\n' | '\r' | '\f') SPACES?) {this.onNewLine();}; /// identifier ::= id_start id_continue* -NAME - : ID_START ID_CONTINUE* - ; +NAME: ID_START ID_CONTINUE*; /// stringliteral ::= [stringprefix](shortstring | longstring) /// stringprefix ::= "r" | "u" | "R" | "U" | "f" | "F" /// | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" -STRING_LITERAL - : ( [rR] | [uU] | [fF] | ( [fF] [rR] ) | ( [rR] [fF] ) )? ( SHORT_STRING | LONG_STRING ) - ; +STRING_LITERAL: ( [rR] | [uU] | [fF] | ( [fF] [rR]) | ( [rR] [fF]))? ( SHORT_STRING | LONG_STRING); /// bytesliteral ::= bytesprefix(shortbytes | longbytes) /// bytesprefix ::= "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" -BYTES_LITERAL - : ( [bB] | ( [bB] [rR] ) | ( [rR] [bB] ) ) ( SHORT_BYTES | LONG_BYTES ) - ; +BYTES_LITERAL: ( [bB] | ( [bB] [rR]) | ( [rR] [bB])) ( SHORT_BYTES | LONG_BYTES); /// decimalinteger ::= nonzerodigit digit* | "0"+ -DECIMAL_INTEGER - : NON_ZERO_DIGIT DIGIT* - | '0'+ - ; +DECIMAL_INTEGER: NON_ZERO_DIGIT DIGIT* | '0'+; /// octinteger ::= "0" ("o" | "O") octdigit+ -OCT_INTEGER - : '0' [oO] OCT_DIGIT+ - ; +OCT_INTEGER: '0' [oO] OCT_DIGIT+; /// hexinteger ::= "0" ("x" | "X") hexdigit+ -HEX_INTEGER - : '0' [xX] HEX_DIGIT+ - ; +HEX_INTEGER: '0' [xX] HEX_DIGIT+; /// bininteger ::= "0" ("b" | "B") bindigit+ -BIN_INTEGER - : '0' [bB] BIN_DIGIT+ - ; +BIN_INTEGER: '0' [bB] BIN_DIGIT+; /// floatnumber ::= pointfloat | exponentfloat -FLOAT_NUMBER - : POINT_FLOAT - | EXPONENT_FLOAT - ; +FLOAT_NUMBER: POINT_FLOAT | EXPONENT_FLOAT; /// imagnumber ::= (floatnumber | intpart) ("j" | "J") -IMAG_NUMBER - : ( FLOAT_NUMBER | INT_PART ) [jJ] - ; - -DOT : '.'; -ELLIPSIS : '...'; -STAR : '*'; -OPEN_PAREN : '(' {this.openBrace();}; -CLOSE_PAREN : ')' {this.closeBrace();}; -COMMA : ','; -COLON : ':'; -SEMI_COLON : ';'; -POWER : '**'; -ASSIGN : '='; -OPEN_BRACK : '[' {this.openBrace();}; -CLOSE_BRACK : ']' {this.closeBrace();}; -OR_OP : '|'; -XOR : '^'; -AND_OP : '&'; -LEFT_SHIFT : '<<'; -RIGHT_SHIFT : '>>'; -ADD : '+'; -MINUS : '-'; -DIV : '/'; -MOD : '%'; -IDIV : '//'; -NOT_OP : '~'; -OPEN_BRACE : '{' {this.openBrace();}; -CLOSE_BRACE : '}' {this.closeBrace();}; -LESS_THAN : '<'; -GREATER_THAN : '>'; -EQUALS : '=='; -GT_EQ : '>='; -LT_EQ : '<='; -NOT_EQ_1 : '<>'; -NOT_EQ_2 : '!='; -AT : '@'; -ARROW : '->'; -ADD_ASSIGN : '+='; -SUB_ASSIGN : '-='; -MULT_ASSIGN : '*='; -AT_ASSIGN : '@='; -DIV_ASSIGN : '/='; -MOD_ASSIGN : '%='; -AND_ASSIGN : '&='; -OR_ASSIGN : '|='; -XOR_ASSIGN : '^='; -LEFT_SHIFT_ASSIGN : '<<='; +IMAG_NUMBER: ( FLOAT_NUMBER | INT_PART) [jJ]; + +DOT : '.'; +ELLIPSIS : '...'; +STAR : '*'; +OPEN_PAREN : '(' {this.openBrace();}; +CLOSE_PAREN : ')' {this.closeBrace();}; +COMMA : ','; +COLON : ':'; +SEMI_COLON : ';'; +POWER : '**'; +ASSIGN : '='; +OPEN_BRACK : '[' {this.openBrace();}; +CLOSE_BRACK : ']' {this.closeBrace();}; +OR_OP : '|'; +XOR : '^'; +AND_OP : '&'; +LEFT_SHIFT : '<<'; +RIGHT_SHIFT : '>>'; +ADD : '+'; +MINUS : '-'; +DIV : '/'; +MOD : '%'; +IDIV : '//'; +NOT_OP : '~'; +OPEN_BRACE : '{' {this.openBrace();}; +CLOSE_BRACE : '}' {this.closeBrace();}; +LESS_THAN : '<'; +GREATER_THAN : '>'; +EQUALS : '=='; +GT_EQ : '>='; +LT_EQ : '<='; +NOT_EQ_1 : '<>'; +NOT_EQ_2 : '!='; +AT : '@'; +ARROW : '->'; +ADD_ASSIGN : '+='; +SUB_ASSIGN : '-='; +MULT_ASSIGN : '*='; +AT_ASSIGN : '@='; +DIV_ASSIGN : '/='; +MOD_ASSIGN : '%='; +AND_ASSIGN : '&='; +OR_ASSIGN : '|='; +XOR_ASSIGN : '^='; +LEFT_SHIFT_ASSIGN : '<<='; RIGHT_SHIFT_ASSIGN : '>>='; -POWER_ASSIGN : '**='; -IDIV_ASSIGN : '//='; +POWER_ASSIGN : '**='; +IDIV_ASSIGN : '//='; -SKIP_ - : ( SPACES | COMMENT | LINE_JOINING ) -> skip - ; +SKIP_: ( SPACES | COMMENT | LINE_JOINING) -> skip; -UNKNOWN_CHAR - : . - ; +UNKNOWN_CHAR: .; /* * fragments @@ -220,143 +189,93 @@ UNKNOWN_CHAR /// shortstring ::= "'" shortstringitem* "'" | '"' shortstringitem* '"' /// shortstringitem ::= shortstringchar | stringescapeseq /// shortstringchar ::= -fragment SHORT_STRING - : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] )* '\'' - | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"' - ; +fragment SHORT_STRING: + '\'' (STRING_ESCAPE_SEQ | ~[\\\r\n\f'])* '\'' + | '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"])* '"' +; /// longstring ::= "'''" longstringitem* "'''" | '"""' longstringitem* '"""' -fragment LONG_STRING - : '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' - | '"""' LONG_STRING_ITEM*? '"""' - ; +fragment LONG_STRING: '\'\'\'' LONG_STRING_ITEM*? '\'\'\'' | '"""' LONG_STRING_ITEM*? '"""'; /// longstringitem ::= longstringchar | stringescapeseq -fragment LONG_STRING_ITEM - : LONG_STRING_CHAR - | STRING_ESCAPE_SEQ - ; +fragment LONG_STRING_ITEM: LONG_STRING_CHAR | STRING_ESCAPE_SEQ; /// longstringchar ::= -fragment LONG_STRING_CHAR - : ~'\\' - ; +fragment LONG_STRING_CHAR: ~'\\'; /// stringescapeseq ::= "\" -fragment STRING_ESCAPE_SEQ - : '\\' . - | '\\' NEWLINE - ; +fragment STRING_ESCAPE_SEQ: '\\' . | '\\' NEWLINE; /// nonzerodigit ::= "1"..."9" -fragment NON_ZERO_DIGIT - : [1-9] - ; +fragment NON_ZERO_DIGIT: [1-9]; /// digit ::= "0"..."9" -fragment DIGIT - : [0-9] - ; +fragment DIGIT: [0-9]; /// octdigit ::= "0"..."7" -fragment OCT_DIGIT - : [0-7] - ; +fragment OCT_DIGIT: [0-7]; /// hexdigit ::= digit | "a"..."f" | "A"..."F" -fragment HEX_DIGIT - : [0-9a-fA-F] - ; +fragment HEX_DIGIT: [0-9a-fA-F]; /// bindigit ::= "0" | "1" -fragment BIN_DIGIT - : [01] - ; +fragment BIN_DIGIT: [01]; /// pointfloat ::= [intpart] fraction | intpart "." -fragment POINT_FLOAT - : INT_PART? FRACTION - | INT_PART '.' - ; +fragment POINT_FLOAT: INT_PART? FRACTION | INT_PART '.'; /// exponentfloat ::= (intpart | pointfloat) exponent -fragment EXPONENT_FLOAT - : ( INT_PART | POINT_FLOAT ) EXPONENT - ; +fragment EXPONENT_FLOAT: ( INT_PART | POINT_FLOAT) EXPONENT; /// intpart ::= digit+ -fragment INT_PART - : DIGIT+ - ; +fragment INT_PART: DIGIT+; /// fraction ::= "." digit+ -fragment FRACTION - : '.' DIGIT+ - ; +fragment FRACTION: '.' DIGIT+; /// exponent ::= ("e" | "E") ["+" | "-"] digit+ -fragment EXPONENT - : [eE] [+-]? DIGIT+ - ; +fragment EXPONENT: [eE] [+-]? DIGIT+; /// shortbytes ::= "'" shortbytesitem* "'" | '"' shortbytesitem* '"' /// shortbytesitem ::= shortbyteschar | bytesescapeseq -fragment SHORT_BYTES - : '\'' ( SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ )* '\'' - | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ )* '"' - ; +fragment SHORT_BYTES: + '\'' (SHORT_BYTES_CHAR_NO_SINGLE_QUOTE | BYTES_ESCAPE_SEQ)* '\'' + | '"' ( SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE | BYTES_ESCAPE_SEQ)* '"' +; /// longbytes ::= "'''" longbytesitem* "'''" | '"""' longbytesitem* '"""' -fragment LONG_BYTES - : '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' - | '"""' LONG_BYTES_ITEM*? '"""' - ; +fragment LONG_BYTES: '\'\'\'' LONG_BYTES_ITEM*? '\'\'\'' | '"""' LONG_BYTES_ITEM*? '"""'; /// longbytesitem ::= longbyteschar | bytesescapeseq -fragment LONG_BYTES_ITEM - : LONG_BYTES_CHAR - | BYTES_ESCAPE_SEQ - ; +fragment LONG_BYTES_ITEM: LONG_BYTES_CHAR | BYTES_ESCAPE_SEQ; /// shortbyteschar ::= -fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0026] - | [\u0028-\u005B] - | [\u005D-\u007F] - ; - -fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE - : [\u0000-\u0009] - | [\u000B-\u000C] - | [\u000E-\u0021] - | [\u0023-\u005B] - | [\u005D-\u007F] - ; +fragment SHORT_BYTES_CHAR_NO_SINGLE_QUOTE: + [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0026] + | [\u0028-\u005B] + | [\u005D-\u007F] +; + +fragment SHORT_BYTES_CHAR_NO_DOUBLE_QUOTE: + [\u0000-\u0009] + | [\u000B-\u000C] + | [\u000E-\u0021] + | [\u0023-\u005B] + | [\u005D-\u007F] +; /// longbyteschar ::= -fragment LONG_BYTES_CHAR - : [\u0000-\u005B] - | [\u005D-\u007F] - ; +fragment LONG_BYTES_CHAR: [\u0000-\u005B] | [\u005D-\u007F]; /// bytesescapeseq ::= "\" -fragment BYTES_ESCAPE_SEQ - : '\\' [\u0000-\u007F] - ; - -fragment SPACES - : [ \t]+ - ; +fragment BYTES_ESCAPE_SEQ: '\\' [\u0000-\u007F]; -fragment COMMENT - : '#' ~[\r\n\f]* - ; +fragment SPACES: [ \t]+; -fragment LINE_JOINING - : '\\' SPACES? ( '\r'? '\n' | '\r' | '\f') - ; +fragment COMMENT: '#' ~[\r\n\f]*; +fragment LINE_JOINING: '\\' SPACES? ( '\r'? '\n' | '\r' | '\f'); // TODO: ANTLR seems lack of some Unicode property support... //$ curl https://www.unicode.org/Public/13.0.0/ucd/PropList.txt | grep Other_ID_ @@ -369,36 +288,26 @@ fragment LINE_JOINING //1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE //19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE -fragment UNICODE_OIDS - : '\u1885'..'\u1886' - | '\u2118' - | '\u212e' - | '\u309b'..'\u309c' - ; +fragment UNICODE_OIDS: '\u1885' ..'\u1886' | '\u2118' | '\u212e' | '\u309b' ..'\u309c'; -fragment UNICODE_OIDC - : '\u00b7' - | '\u0387' - | '\u1369'..'\u1371' - | '\u19da' - ; +fragment UNICODE_OIDC: '\u00b7' | '\u0387' | '\u1369' ..'\u1371' | '\u19da'; /// id_start ::= -fragment ID_START - : '_' - | [\p{L}] - | [\p{Nl}] - //| [\p{Other_ID_Start}] - | UNICODE_OIDS - ; +fragment ID_START: + '_' + | [\p{L}] + | [\p{Nl}] + //| [\p{Other_ID_Start}] + | UNICODE_OIDS +; /// id_continue ::= -fragment ID_CONTINUE - : ID_START - | [\p{Mn}] - | [\p{Mc}] - | [\p{Nd}] - | [\p{Pc}] - //| [\p{Other_ID_Continue}] - | UNICODE_OIDC - ; +fragment ID_CONTINUE: + ID_START + | [\p{Mn}] + | [\p{Mc}] + | [\p{Nd}] + | [\p{Pc}] + //| [\p{Other_ID_Continue}] + | UNICODE_OIDC +; \ No newline at end of file diff --git a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 index 8b0143de6..4c5a27cf2 100644 --- a/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 +++ b/languages/python-3/src/main/antlr4/de/jplag/python3/grammar/Python3Parser.g4 @@ -31,186 +31,623 @@ // Scraping from https://docs.python.org/3/reference/grammar.html +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + parser grammar Python3Parser; options { superClass = Python3ParserBase; - tokenVocab=Python3Lexer; + tokenVocab = Python3Lexer; } +// Insert here @header for C++ parser. + // All comments that start with "///" are copy-pasted from // The Python Language Reference -single_input: NEWLINE | simple_stmts | compound_stmt NEWLINE; -file_input: (NEWLINE | stmt)* EOF; -eval_input: testlist NEWLINE* EOF; - -decorator: '@' dotted_name ( '(' arglist? ')' )? NEWLINE; -decorators: decorator+; -decorated: decorators (classdef | funcdef | async_funcdef); - -async_funcdef: ASYNC funcdef; -funcdef: 'def' name parameters ('->' test)? ':' block; - -parameters: '(' typedargslist? ')'; -typedargslist: (tfpdef ('=' test)? (',' tfpdef ('=' test)?)* (',' ( - '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )? - | '**' tfpdef ','? )? )? - | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','? )? )? - | '**' tfpdef ','?); -tfpdef: name (':' test)?; -varargslist: (vfpdef ('=' test)? (',' vfpdef ('=' test)?)* (',' ( - '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )? - | '**' vfpdef (',')?)?)? - | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','? )? )? - | '**' vfpdef ','? -); -vfpdef: name; - -stmt: simple_stmts | compound_stmt; -simple_stmts: simple_stmt (';' simple_stmt)* ';'? NEWLINE; -simple_stmt: (expr_stmt | del_stmt | pass_stmt | flow_stmt | - import_stmt | global_stmt | nonlocal_stmt | assert_stmt); -expr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) | - ('=' (yield_expr|testlist_star_expr))*); -annassign: ':' test ('=' test)?; -testlist_star_expr: (test|star_expr) (',' (test|star_expr))* ','?; -augassign: ('+=' | '-=' | '*=' | '@=' | '/=' | '%=' | '&=' | '|=' | '^=' | - '<<=' | '>>=' | '**=' | '//='); +single_input + : NEWLINE + | simple_stmts + | compound_stmt NEWLINE + ; + +file_input + : (NEWLINE | stmt)* EOF + ; + +eval_input + : testlist NEWLINE* EOF + ; + +decorator + : '@' dotted_name ('(' arglist? ')')? NEWLINE + ; + +decorators + : decorator+ + ; + +decorated + : decorators (classdef | funcdef | async_funcdef) + ; + +async_funcdef + : ASYNC funcdef + ; + +funcdef + : 'def' name parameters ('->' test)? ':' block + ; + +parameters + : '(' typedargslist? ')' + ; + +typedargslist + : ( + tfpdef ('=' test)? (',' tfpdef ('=' test)?)* ( + ',' ( + '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)? + | '**' tfpdef ','? + )? + )? + | '*' tfpdef? (',' tfpdef ('=' test)?)* (',' ('**' tfpdef ','?)?)? + | '**' tfpdef ','? + ) + ; + +tfpdef + : name (':' test)? + ; + +varargslist + : ( + vfpdef ('=' test)? (',' vfpdef ('=' test)?)* ( + ',' ( + '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)? + | '**' vfpdef (',')? + )? + )? + | '*' vfpdef? (',' vfpdef ('=' test)?)* (',' ('**' vfpdef ','?)?)? + | '**' vfpdef ','? + ) + ; + +vfpdef + : name + ; + +stmt + : simple_stmts + | compound_stmt + ; + +simple_stmts + : simple_stmt (';' simple_stmt)* ';'? NEWLINE + ; + +simple_stmt + : ( + expr_stmt + | del_stmt + | pass_stmt + | flow_stmt + | import_stmt + | global_stmt + | nonlocal_stmt + | assert_stmt + ) + ; + +expr_stmt + : testlist_star_expr ( + annassign + | augassign (yield_expr | testlist) + | ('=' (yield_expr | testlist_star_expr))* + ) + ; + +annassign + : ':' test ('=' test)? + ; + +testlist_star_expr + : (test | star_expr) (',' (test | star_expr))* ','? + ; + +augassign + : ( + '+=' + | '-=' + | '*=' + | '@=' + | '/=' + | '%=' + | '&=' + | '|=' + | '^=' + | '<<=' + | '>>=' + | '**=' + | '//=' + ) + ; + // For normal and annotated assignments, additional restrictions enforced by the interpreter -del_stmt: 'del' exprlist; -pass_stmt: 'pass'; -flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt; -break_stmt: 'break'; -continue_stmt: 'continue'; -return_stmt: 'return' testlist?; -yield_stmt: yield_expr; -raise_stmt: 'raise' (test ('from' test)?)?; -import_stmt: import_name | import_from; -import_name: 'import' dotted_as_names; +del_stmt + : 'del' exprlist + ; + +pass_stmt + : 'pass' + ; + +flow_stmt + : break_stmt + | continue_stmt + | return_stmt + | raise_stmt + | yield_stmt + ; + +break_stmt + : 'break' + ; + +continue_stmt + : 'continue' + ; + +return_stmt + : 'return' testlist? + ; + +yield_stmt + : yield_expr + ; + +raise_stmt + : 'raise' (test ('from' test)?)? + ; + +import_stmt + : import_name + | import_from + ; + +import_name + : 'import' dotted_as_names + ; + // note below: the ('.' | '...') is necessary because '...' is tokenized as ELLIPSIS -import_from: ('from' (('.' | '...')* dotted_name | ('.' | '...')+) - 'import' ('*' | '(' import_as_names ')' | import_as_names)); -import_as_name: name ('as' name)?; -dotted_as_name: dotted_name ('as' name)?; -import_as_names: import_as_name (',' import_as_name)* ','?; -dotted_as_names: dotted_as_name (',' dotted_as_name)*; -dotted_name: name ('.' name)*; -global_stmt: 'global' name (',' name)*; -nonlocal_stmt: 'nonlocal' name (',' name)*; -assert_stmt: 'assert' test (',' test)?; - -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt; -async_stmt: ASYNC (funcdef | with_stmt | for_stmt); -if_stmt: 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)?; -while_stmt: 'while' test ':' block ('else' ':' block)?; -for_stmt: 'for' exprlist 'in' testlist ':' block ('else' ':' block)?; -try_stmt: ('try' ':' block - ((except_clause ':' block)+ - ('else' ':' block)? - ('finally' ':' block)? | - 'finally' ':' block)); -with_stmt: 'with' with_item (',' with_item)* ':' block; -with_item: test ('as' expr)?; +import_from + : ( + 'from' (('.' | '...')* dotted_name | ('.' | '...')+) 'import' ( + '*' + | '(' import_as_names ')' + | import_as_names + ) + ) + ; + +import_as_name + : name ('as' name)? + ; + +dotted_as_name + : dotted_name ('as' name)? + ; + +import_as_names + : import_as_name (',' import_as_name)* ','? + ; + +dotted_as_names + : dotted_as_name (',' dotted_as_name)* + ; + +dotted_name + : name ('.' name)* + ; + +global_stmt + : 'global' name (',' name)* + ; + +nonlocal_stmt + : 'nonlocal' name (',' name)* + ; + +assert_stmt + : 'assert' test (',' test)? + ; + +compound_stmt + : if_stmt + | while_stmt + | for_stmt + | try_stmt + | with_stmt + | funcdef + | classdef + | decorated + | async_stmt + | match_stmt + ; + +async_stmt + : ASYNC (funcdef | with_stmt | for_stmt) + ; + +if_stmt + : 'if' test ':' block ('elif' test ':' block)* ('else' ':' block)? + ; + +while_stmt + : 'while' test ':' block ('else' ':' block)? + ; + +for_stmt + : 'for' exprlist 'in' testlist ':' block ('else' ':' block)? + ; + +try_stmt + : ( + 'try' ':' block ( + (except_clause ':' block)+ ('else' ':' block)? ('finally' ':' block)? + | 'finally' ':' block + ) + ) + ; + +with_stmt + : 'with' with_item (',' with_item)* ':' block + ; + +with_item + : test ('as' expr)? + ; + // NB compile.c makes sure that the default except clause is last -except_clause: 'except' (test ('as' name)?)?; -block: simple_stmts | NEWLINE INDENT stmt+ DEDENT; -match_stmt: 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT ; -subject_expr: star_named_expression ',' star_named_expressions? | test ; -star_named_expressions: ',' star_named_expression+ ','? ; -star_named_expression: '*' expr | test ; -case_block: 'case' patterns guard? ':' block ; -guard: 'if' test ; -patterns: open_sequence_pattern | pattern ; -pattern: as_pattern | or_pattern ; -as_pattern: or_pattern 'as' pattern_capture_target ; -or_pattern: closed_pattern ('|' closed_pattern)* ; -closed_pattern: literal_pattern | capture_pattern | wildcard_pattern | value_pattern | group_pattern | sequence_pattern | mapping_pattern | class_pattern ; -literal_pattern: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ; -literal_expr: signed_number { this.cannotBePlusMinus() }? | complex_number | strings | 'None' | 'True' | 'False' ; -complex_number: signed_real_number '+' imaginary_number +except_clause + : 'except' (test ('as' name)?)? + ; + +block + : simple_stmts + | NEWLINE INDENT stmt+ DEDENT + ; + +match_stmt + : 'match' subject_expr ':' NEWLINE INDENT case_block+ DEDENT + ; + +subject_expr + : star_named_expression ',' star_named_expressions? + | test + ; + +star_named_expressions + : ',' star_named_expression+ ','? + ; + +star_named_expression + : '*' expr + | test + ; + +case_block + : 'case' patterns guard? ':' block + ; + +guard + : 'if' test + ; + +patterns + : open_sequence_pattern + | pattern + ; + +pattern + : as_pattern + | or_pattern + ; + +as_pattern + : or_pattern 'as' pattern_capture_target + ; + +or_pattern + : closed_pattern ('|' closed_pattern)* + ; + +closed_pattern + : literal_pattern + | capture_pattern + | wildcard_pattern + | value_pattern + | group_pattern + | sequence_pattern + | mapping_pattern + | class_pattern + ; + +literal_pattern + : signed_number { this.CannotBePlusMinus() }? + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; + +literal_expr + : signed_number { this.CannotBePlusMinus() }? + | complex_number + | strings + | 'None' + | 'True' + | 'False' + ; + +complex_number + : signed_real_number '+' imaginary_number | signed_real_number '-' imaginary_number ; -signed_number: NUMBER | '-' NUMBER ; -signed_real_number: real_number | '-' real_number ; -real_number: NUMBER ; -imaginary_number: NUMBER ; -capture_pattern: pattern_capture_target ; -pattern_capture_target: /* cannot be '_' */ name { this.cannotBeDotLpEq() }? ; -wildcard_pattern: '_' ; -value_pattern: attr { this.cannotBeDotLpEq() }? ; -attr: name ('.' name)+ ; -name_or_attr: attr | name ; -group_pattern: '(' pattern ')' ; -sequence_pattern: - '[' maybe_sequence_pattern? ']' + +signed_number + : NUMBER + | '-' NUMBER + ; + +signed_real_number + : real_number + | '-' real_number + ; + +real_number + : NUMBER + ; + +imaginary_number + : NUMBER + ; + +capture_pattern + : pattern_capture_target + ; + +pattern_capture_target + : /* cannot be '_' */ name { this.CannotBeDotLpEq() }? + ; + +wildcard_pattern + : '_' + ; + +value_pattern + : attr { this.CannotBeDotLpEq() }? + ; + +attr + : name ('.' name)+ + ; + +name_or_attr + : attr + | name + ; + +group_pattern + : '(' pattern ')' + ; + +sequence_pattern + : '[' maybe_sequence_pattern? ']' | '(' open_sequence_pattern? ')' ; -open_sequence_pattern: maybe_star_pattern ',' maybe_sequence_pattern? ; -maybe_sequence_pattern: maybe_star_pattern (',' maybe_star_pattern)* ','? ; -maybe_star_pattern: star_pattern | pattern ; -star_pattern: - '*' pattern_capture_target + +open_sequence_pattern + : maybe_star_pattern ',' maybe_sequence_pattern? + ; + +maybe_sequence_pattern + : maybe_star_pattern (',' maybe_star_pattern)* ','? + ; + +maybe_star_pattern + : star_pattern + | pattern + ; + +star_pattern + : '*' pattern_capture_target | '*' wildcard_pattern ; -mapping_pattern: '{' '}' + +mapping_pattern + : '{' '}' | '{' double_star_pattern ','? '}' | '{' items_pattern ',' double_star_pattern ','? '}' | '{' items_pattern ','? '}' ; -items_pattern: key_value_pattern (',' key_value_pattern)* ; -key_value_pattern: (literal_expr | attr) ':' pattern ; -double_star_pattern: '**' pattern_capture_target ; -class_pattern: name_or_attr '(' ')' + +items_pattern + : key_value_pattern (',' key_value_pattern)* + ; + +key_value_pattern + : (literal_expr | attr) ':' pattern + ; + +double_star_pattern + : '**' pattern_capture_target + ; + +class_pattern + : name_or_attr '(' ')' | name_or_attr '(' positional_patterns ','? ')' | name_or_attr '(' keyword_patterns ','? ')' | name_or_attr '(' positional_patterns ',' keyword_patterns ','? ')' ; -positional_patterns: pattern (',' pattern)* ; -keyword_patterns: keyword_pattern (',' keyword_pattern)* ; -keyword_pattern: name '=' pattern ; - -test: or_test ('if' or_test 'else' test)? | lambdef; -test_nocond: or_test | lambdef_nocond; -lambdef: 'lambda' varargslist? ':' test; -lambdef_nocond: 'lambda' varargslist? ':' test_nocond; -or_test: and_test ('or' and_test)*; -and_test: not_test ('and' not_test)*; -not_test: 'not' not_test | comparison; -comparison: expr (comp_op expr)*; + +positional_patterns + : pattern (',' pattern)* + ; + +keyword_patterns + : keyword_pattern (',' keyword_pattern)* + ; + +keyword_pattern + : name '=' pattern + ; + +test + : or_test ('if' or_test 'else' test)? + | lambdef + ; + +test_nocond + : or_test + | lambdef_nocond + ; + +lambdef + : 'lambda' varargslist? ':' test + ; + +lambdef_nocond + : 'lambda' varargslist? ':' test_nocond + ; + +or_test + : and_test ('or' and_test)* + ; + +and_test + : not_test ('and' not_test)* + ; + +not_test + : 'not' not_test + | comparison + ; + +comparison + : expr (comp_op expr)* + ; + // <> isn't actually a valid comparison operator in Python. It's here for the // sake of a __future__ import described in PEP 401 (which really works :-) -comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'; -star_expr: '*' expr; -expr: xor_expr ('|' xor_expr)*; -xor_expr: and_expr ('^' and_expr)*; -and_expr: shift_expr ('&' shift_expr)*; -shift_expr: arith_expr (('<<'|'>>') arith_expr)*; -arith_expr: term (('+'|'-') term)*; -term: factor (('*'|'@'|'/'|'%'|'//') factor)*; -factor: ('+'|'-'|'~') factor | power; -power: atom_expr ('**' factor)?; -atom_expr: AWAIT? atom trailer*; -atom: '(' (yield_expr|testlist_comp)? ')' - | '[' testlist_comp? ']' - | '{' dictorsetmaker? '}' - | name | NUMBER | STRING+ | '...' | 'None' | 'True' | 'False' ; -name : NAME | '_' | 'match' ; -testlist_comp: (test|star_expr) ( comp_for | (',' (test|star_expr))* ','? ); -trailer: '(' arglist? ')' | '[' subscriptlist ']' | '.' name ; -subscriptlist: subscript_ (',' subscript_)* ','?; -subscript_: test | test? ':' test? sliceop?; -sliceop: ':' test?; -exprlist: (expr|star_expr) (',' (expr|star_expr))* ','?; -testlist: test (',' test)* ','?; -dictorsetmaker: ( ((test ':' test | '**' expr) - (comp_for | (',' (test ':' test | '**' expr))* ','?)) | - ((test | star_expr) - (comp_for | (',' (test | star_expr))* ','?)) ); - -classdef: 'class' name ('(' arglist? ')')? ':' block; - -arglist: argument (',' argument)* ','?; +comp_op + : '<' + | '>' + | '==' + | '>=' + | '<=' + | '<>' + | '!=' + | 'in' + | 'not' 'in' + | 'is' + | 'is' 'not' + ; + +star_expr + : '*' expr + ; + +expr + : atom_expr + | expr '**' expr + | ('+' | '-' | '~')+ expr + | expr ('*' | '@' | '/' | '%' | '//') expr + | expr ('+' | '-') expr + | expr ('<<' | '>>') expr + | expr '&' expr + | expr '^' expr + | expr '|' expr + ; + +//expr: xor_expr ('|' xor_expr)*; +//xor_expr: and_expr ('^' and_expr)*; +//and_expr: shift_expr ('&' shift_expr)*; +//shift_expr: arith_expr (('<<'|'>>') arith_expr)*; +//arith_expr: term (('+'|'-') term)*; +//term: factor (('*'|'@'|'/'|'%'|'//') factor)*; +//factor: ('+'|'-'|'~') factor | power; +//power: atom_expr ('**' factor)?; +atom_expr + : AWAIT? atom trailer* + ; + +atom + : '(' (yield_expr | testlist_comp)? ')' + | '[' testlist_comp? ']' + | '{' dictorsetmaker? '}' + | name + | NUMBER + | STRING+ + | '...' + | 'None' + | 'True' + | 'False' + ; + +name + : NAME + | '_' + | 'match' + ; + +testlist_comp + : (test | star_expr) (comp_for | (',' (test | star_expr))* ','?) + ; + +trailer + : '(' arglist? ')' + | '[' subscriptlist ']' + | '.' name + ; + +subscriptlist + : subscript_ (',' subscript_)* ','? + ; + +subscript_ + : test + | test? ':' test? sliceop? + ; + +sliceop + : ':' test? + ; + +exprlist + : (expr | star_expr) (',' (expr | star_expr))* ','? + ; + +testlist + : test (',' test)* ','? + ; + +dictorsetmaker + : ( + ((test ':' test | '**' expr) (comp_for | (',' (test ':' test | '**' expr))* ','?)) + | ((test | star_expr) (comp_for | (',' (test | star_expr))* ','?)) + ) + ; + +classdef + : 'class' name ('(' arglist? ')')? ':' block + ; + +arglist + : argument (',' argument)* ','? + ; // The reason that keywords are test nodes instead of NAME is that using NAME // results in an ambiguity. ast.c makes sure it's a NAME. @@ -221,19 +658,37 @@ arglist: argument (',' argument)* ','?; // Illegal combinations and orderings are blocked in ast.c: // multiple (test comp_for) arguments are blocked; keyword unpackings // that precede iterable unpackings are blocked; etc. -argument: ( test comp_for? | - test '=' test | - '**' test | - '*' test ); +argument + : (test comp_for? | test '=' test | '**' test | '*' test) + ; + +comp_iter + : comp_for + | comp_if + ; -comp_iter: comp_for | comp_if; -comp_for: ASYNC? 'for' exprlist 'in' or_test comp_iter?; -comp_if: 'if' test_nocond comp_iter?; +comp_for + : ASYNC? 'for' exprlist 'in' or_test comp_iter? + ; + +comp_if + : 'if' test_nocond comp_iter? + ; // not used in grammar, but may appear in "node" passed from Parser to Compiler -encoding_decl: name; +encoding_decl + : name + ; -yield_expr: 'yield' yield_arg?; -yield_arg: 'from' test | testlist; +yield_expr + : 'yield' yield_arg? + ; + +yield_arg + : 'from' test + | testlist + ; -strings: STRING+ ; +strings + : STRING+ + ; \ No newline at end of file diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java index 0e24adf20..9b842dc8c 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java @@ -1,17 +1,18 @@ package de.jplag.python3.grammar; -import java.util.Deque; -import java.util.LinkedList; +import org.antlr.v4.runtime.*; -import org.antlr.v4.runtime.CharStream; -import org.antlr.v4.runtime.CommonToken; -import org.antlr.v4.runtime.Lexer; -import org.antlr.v4.runtime.Token; +import java.util.ArrayDeque; +import java.util.Deque; abstract class Python3LexerBase extends Lexer { - private LinkedList tokens = new LinkedList<>(); - private Deque indents = new LinkedList<>(); + // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). + private java.util.LinkedList tokens = new java.util.LinkedList<>(); + // The stack that keeps track of the indentation level. + private Deque indents = new ArrayDeque<>(); + // The amount of opened braces, brackets and parenthesis. private int opened = 0; + // The most recently produced token. private Token lastToken = null; protected Python3LexerBase(CharStream input) { @@ -26,35 +27,38 @@ public void emit(Token t) { @Override public Token nextToken() { + // Check if the end-of-file is ahead and there are still some DEDENTS expected. if (_input.LA(1) == EOF && !this.indents.isEmpty()) { + // Remove any trailing EOF tokens from our buffer. + for (int i = tokens.size() - 1; i >= 0; i--) { + if (tokens.get(i).getType() == EOF) { + tokens.remove(i); + } + } + + // First emit an extra line break that serves as the end of the statement. this.emit(commonToken(Python3Lexer.NEWLINE, "\n")); - this.removeTrailingEofTokens(); + // Now emit as much DEDENT tokens as needed. while (!indents.isEmpty()) { this.emit(createDedent()); indents.pop(); } - this.emit(commonToken(EOF, "")); + // Put the EOF back on the token stream. + this.emit(commonToken(Python3Lexer.EOF, "")); } Token next = super.nextToken(); if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. this.lastToken = next; } return tokens.isEmpty() ? next : tokens.poll(); } - private void removeTrailingEofTokens() { - for (int i = tokens.size() - 1; i >= 0; i--) { - if (tokens.get(i).getType() == EOF) { - tokens.remove(i); - } - } - } - private Token createDedent() { CommonToken dedent = commonToken(Python3Lexer.DEDENT, ""); dedent.setLine(this.lastToken.getLine()); @@ -67,21 +71,24 @@ private CommonToken commonToken(int type, String text) { return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); } - /** - * Calculates the indentation of the provided spaces, taking the following rules into account: - *

- * "Tabs are replaced (from left to right) by one to eight spaces such that the total number of characters up to and - * including the replacement is a multiple of eight [...]" - *

- * -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation - **/ + // Calculates the indentation of the provided spaces, taking the + // following rules into account: + // + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation static int getIndentationCount(String spaces) { int count = 0; for (char ch : spaces.toCharArray()) { - if (ch == '\t') { - count += 8 - (count % 8); - } else { - count++; + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; } } @@ -92,34 +99,42 @@ boolean atStartOfInput() { return super.getCharPositionInLine() == 0 && super.getLine() == 1; } - void openBrace() { + void openBrace(){ this.opened++; } - void closeBrace() { + void closeBrace(){ this.opened--; } - void onNewLine() { + void onNewLine(){ String newLine = getText().replaceAll("[^\r\n\f]+", ""); String spaces = getText().replaceAll("[\r\n\f]+", ""); + // Strip newlines inside open clauses except if we are near EOF. We keep NEWLINEs near EOF to + // satisfy the final newline needed by the single_put rule used by the REPL. int next = _input.LA(1); int nextnext = _input.LA(2); if (opened > 0 || (nextnext != -1 && (next == '\r' || next == '\n' || next == '\f' || next == '#'))) { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. skip(); - } else { + } + else { emit(commonToken(Python3Lexer.NEWLINE, newLine)); int indent = getIndentationCount(spaces); int previous = indents.isEmpty() ? 0 : indents.peek(); - if (indent == previous) { + // skip indents of the same size as the present indent-size skip(); - } else if (indent > previous) { + } + else if (indent > previous) { indents.push(indent); emit(commonToken(Python3Lexer.INDENT, spaces)); - } else { - while (!indents.isEmpty() && indents.peek() > indent) { + } + else { + // Possibly emit more than 1 DEDENT token. + while(!indents.isEmpty() && indents.peek() > indent) { this.emit(createDedent()); indents.pop(); } @@ -128,11 +143,12 @@ void onNewLine() { } @Override - public void reset() { - tokens = new LinkedList<>(); - indents = new LinkedList<>(); + public void reset() + { + tokens = new java.util.LinkedList<>(); + indents = new ArrayDeque<>(); opened = 0; lastToken = null; super.reset(); } -} +} \ No newline at end of file diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java index 44b5926a4..c3623ed74 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java @@ -1,18 +1,21 @@ package de.jplag.python3.grammar; -import org.antlr.v4.runtime.Parser; -import org.antlr.v4.runtime.TokenStream; +import org.antlr.v4.runtime.*; -public abstract class Python3ParserBase extends Parser { - protected Python3ParserBase(TokenStream input) { +public abstract class Python3ParserBase extends Parser +{ + protected Python3ParserBase(TokenStream input) + { super(input); } - public boolean cannotBePlusMinus() { + public boolean CannotBePlusMinus() + { return true; } - public boolean cannotBeDotLpEq() { + public boolean CannotBeDotLpEq() + { return true; } -} +} \ No newline at end of file From e8bb6fb3e2adbffb1c6f618047c854caa1754a23 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 14:05:47 +0200 Subject: [PATCH 2/7] Migrated python language module to new syntax --- languages/python-3/pom.xml | 5 + .../jplag/python3/JplagPython3Listener.java | 216 ------------------ .../main/java/de/jplag/python3/Parser.java | 78 ------- .../java/de/jplag/python3/PythonLanguage.java | 18 +- .../java/de/jplag/python3/PythonListener.java | 107 +++++++++ .../de/jplag/python3/PythonParserAdapter.java | 33 +++ 6 files changed, 148 insertions(+), 309 deletions(-) delete mode 100644 languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java delete mode 100644 languages/python-3/src/main/java/de/jplag/python3/Parser.java create mode 100644 languages/python-3/src/main/java/de/jplag/python3/PythonListener.java create mode 100644 languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java diff --git a/languages/python-3/pom.xml b/languages/python-3/pom.xml index 92712c0b1..fb6865e69 100644 --- a/languages/python-3/pom.xml +++ b/languages/python-3/pom.xml @@ -13,6 +13,11 @@ org.antlr antlr4-runtime + + de.jplag + language-antlr-utils + ${revision} + diff --git a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java b/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java deleted file mode 100644 index 695d07e40..000000000 --- a/languages/python-3/src/main/java/de/jplag/python3/JplagPython3Listener.java +++ /dev/null @@ -1,216 +0,0 @@ -package de.jplag.python3; - -import static de.jplag.python3.Python3TokenType.APPLY; -import static de.jplag.python3.Python3TokenType.ARRAY; -import static de.jplag.python3.Python3TokenType.ASSERT; -import static de.jplag.python3.Python3TokenType.ASSIGN; -import static de.jplag.python3.Python3TokenType.BREAK; -import static de.jplag.python3.Python3TokenType.CLASS_BEGIN; -import static de.jplag.python3.Python3TokenType.CLASS_END; -import static de.jplag.python3.Python3TokenType.CONTINUE; -import static de.jplag.python3.Python3TokenType.DEC_BEGIN; -import static de.jplag.python3.Python3TokenType.DEC_END; -import static de.jplag.python3.Python3TokenType.DEL; -import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN; -import static de.jplag.python3.Python3TokenType.EXCEPT_END; -import static de.jplag.python3.Python3TokenType.FINALLY; -import static de.jplag.python3.Python3TokenType.FOR_BEGIN; -import static de.jplag.python3.Python3TokenType.FOR_END; -import static de.jplag.python3.Python3TokenType.IF_BEGIN; -import static de.jplag.python3.Python3TokenType.IF_END; -import static de.jplag.python3.Python3TokenType.IMPORT; -import static de.jplag.python3.Python3TokenType.LAMBDA; -import static de.jplag.python3.Python3TokenType.METHOD_BEGIN; -import static de.jplag.python3.Python3TokenType.METHOD_END; -import static de.jplag.python3.Python3TokenType.RAISE; -import static de.jplag.python3.Python3TokenType.RETURN; -import static de.jplag.python3.Python3TokenType.TRY_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_END; -import static de.jplag.python3.Python3TokenType.WITH_BEGIN; -import static de.jplag.python3.Python3TokenType.WITH_END; -import static de.jplag.python3.Python3TokenType.YIELD; - -import org.antlr.v4.runtime.tree.TerminalNode; - -import de.jplag.python3.grammar.Python3Parser; -import de.jplag.python3.grammar.Python3ParserBaseListener; - -public class JplagPython3Listener extends Python3ParserBaseListener { - - private final Parser parser; - - public JplagPython3Listener(Parser parser) { - this.parser = parser; - } - - @Override - public void enterAssert_stmt(Python3Parser.Assert_stmtContext ctx) { - parser.add(ASSERT, ctx.getStart()); - } - - @Override - public void enterDecorated(Python3Parser.DecoratedContext ctx) { - parser.add(DEC_BEGIN, ctx.getStart()); - } - - @Override - public void exitDecorated(Python3Parser.DecoratedContext ctx) { - parser.addEnd(DEC_END, ctx.getStop()); - } - - @Override - public void enterRaise_stmt(Python3Parser.Raise_stmtContext ctx) { - parser.add(RAISE, ctx.getStart()); - } - - @Override - public void enterExcept_clause(Python3Parser.Except_clauseContext ctx) { - parser.add(EXCEPT_BEGIN, ctx.getStart()); - } - - @Override - public void exitExcept_clause(Python3Parser.Except_clauseContext ctx) { - parser.addEnd(EXCEPT_END, ctx.getStop()); - } - - @Override - public void enterDictorsetmaker(Python3Parser.DictorsetmakerContext ctx) { - parser.add(ARRAY, ctx.getStart()); - } - - @Override - public void enterReturn_stmt(Python3Parser.Return_stmtContext ctx) { - parser.add(RETURN, ctx.getStart()); - } - - @Override - public void enterWhile_stmt(Python3Parser.While_stmtContext ctx) { - parser.add(WHILE_BEGIN, ctx.getStart()); - } - - @Override - public void exitWhile_stmt(Python3Parser.While_stmtContext ctx) { - parser.addEnd(WHILE_END, ctx.getStop()); - } - - @Override - public void enterYield_arg(Python3Parser.Yield_argContext ctx) { - parser.add(YIELD, ctx.getStart()); - } - - @Override - public void enterImport_stmt(Python3Parser.Import_stmtContext ctx) { - parser.add(IMPORT, ctx.getStart()); - } - - @Override - public void enterLambdef(Python3Parser.LambdefContext ctx) { - parser.add(LAMBDA, ctx.getStart()); - } - - @Override - public void enterTry_stmt(Python3Parser.Try_stmtContext ctx) { - parser.add(TRY_BEGIN, ctx.getStart()); - } - - @Override - public void enterBreak_stmt(Python3Parser.Break_stmtContext ctx) { - parser.add(BREAK, ctx.getStart()); - } - - @Override - public void enterTestlist_comp(Python3Parser.Testlist_compContext ctx) { - if (ctx.getText().contains(",")) { - parser.add(ARRAY, ctx.getStart()); - } - } - - @Override - public void enterIf_stmt(Python3Parser.If_stmtContext ctx) { - parser.add(IF_BEGIN, ctx.getStart()); - } - - @Override - public void exitIf_stmt(Python3Parser.If_stmtContext ctx) { - parser.addEnd(IF_END, ctx.getStop()); - } - - @Override - public void enterWith_stmt(Python3Parser.With_stmtContext ctx) { - parser.add(WITH_BEGIN, ctx.getStart()); - } - - @Override - public void exitWith_stmt(Python3Parser.With_stmtContext ctx) { - parser.addEnd(WITH_END, ctx.getStop()); - } - - @Override - public void enterClassdef(Python3Parser.ClassdefContext ctx) { - parser.add(CLASS_BEGIN, ctx.getStart()); - } - - @Override - public void exitClassdef(Python3Parser.ClassdefContext ctx) { - parser.addEnd(CLASS_END, ctx.getStop()); - } - - @Override - public void enterTrailer(Python3Parser.TrailerContext ctx) { - if (ctx.getText().charAt(0) == '(') { - parser.add(APPLY, ctx.getStart()); - } else { - parser.add(ARRAY, ctx.getStart()); - } - } - - @Override - public void enterFuncdef(Python3Parser.FuncdefContext ctx) { - parser.add(METHOD_BEGIN, ctx.getStart()); - } - - @Override - public void exitFuncdef(Python3Parser.FuncdefContext ctx) { - parser.addEnd(METHOD_END, ctx.getStop()); - } - - @Override - public void enterAugassign(Python3Parser.AugassignContext ctx) { - parser.add(ASSIGN, ctx.getStart()); - } - - @Override - public void enterYield_stmt(Python3Parser.Yield_stmtContext ctx) { - parser.add(YIELD, ctx.getStart()); - } - - @Override - public void enterContinue_stmt(Python3Parser.Continue_stmtContext ctx) { - parser.add(CONTINUE, ctx.getStart()); - } - - @Override - public void enterFor_stmt(Python3Parser.For_stmtContext ctx) { - parser.add(FOR_BEGIN, ctx.getStart()); - } - - @Override - public void exitFor_stmt(Python3Parser.For_stmtContext ctx) { - parser.addEnd(FOR_END, ctx.getStop()); - } - - @Override - public void enterDel_stmt(Python3Parser.Del_stmtContext ctx) { - parser.add(DEL, ctx.getStart()); - } - - @Override - public void visitTerminal(TerminalNode node) { - if (node.getText().equals("=")) { - parser.add(ASSIGN, node.getSymbol()); - } else if (node.getText().equals("finally")) { - parser.add(FINALLY, node.getSymbol()); - } - } -} diff --git a/languages/python-3/src/main/java/de/jplag/python3/Parser.java b/languages/python-3/src/main/java/de/jplag/python3/Parser.java deleted file mode 100644 index 2dc352bfe..000000000 --- a/languages/python-3/src/main/java/de/jplag/python3/Parser.java +++ /dev/null @@ -1,78 +0,0 @@ -package de.jplag.python3; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.antlr.v4.runtime.CharStreams; -import org.antlr.v4.runtime.CommonTokenStream; -import org.antlr.v4.runtime.tree.ParseTree; -import org.antlr.v4.runtime.tree.ParseTreeWalker; - -import de.jplag.AbstractParser; -import de.jplag.ParsingException; -import de.jplag.Token; -import de.jplag.TokenType; -import de.jplag.python3.grammar.Python3Lexer; -import de.jplag.python3.grammar.Python3Parser; -import de.jplag.python3.grammar.Python3Parser.File_inputContext; -import de.jplag.util.FileUtils; - -public class Parser extends AbstractParser { - - private List tokens; - private File currentFile; - - /** - * Creates the parser. - */ - public Parser() { - super(); - } - - public List parse(Set files) throws ParsingException { - tokens = new ArrayList<>(); - for (File file : files) { - logger.trace("Parsing file {}", file.getName()); - parseFile(file); - tokens.add(Token.fileEnd(file)); - } - return tokens; - } - - private void parseFile(File file) throws ParsingException { - try (BufferedReader reader = FileUtils.openFileReader(file)) { - currentFile = file; - - // create a lexer that feeds off of input CharStream - Python3Lexer lexer = new Python3Lexer(CharStreams.fromReader(reader)); - - // create a buffer of tokens pulled from the lexer - CommonTokenStream tokens = new CommonTokenStream(lexer); - - // create a parser that feeds off the tokens buffer - Python3Parser parser = new Python3Parser(tokens); - File_inputContext in = parser.file_input(); - - ParseTreeWalker ptw = new ParseTreeWalker(); - for (int i = 0; i < in.getChildCount(); i++) { - ParseTree pt = in.getChild(i); - ptw.walk(new JplagPython3Listener(this), pt); - } - - } catch (IOException e) { - throw new ParsingException(file, e.getMessage(), e); - } - } - - public void add(TokenType type, org.antlr.v4.runtime.Token token) { - tokens.add(new Token(type, currentFile, token.getLine(), token.getCharPositionInLine() + 1, token.getText().length())); - } - - public void addEnd(TokenType type, org.antlr.v4.runtime.Token token) { - tokens.add(new Token(type, currentFile, token.getLine(), tokens.get(tokens.size() - 1).getColumn() + 1, 0)); - } -} diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java index b5a8fd73f..3df658728 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonLanguage.java @@ -1,23 +1,16 @@ package de.jplag.python3; -import java.io.File; -import java.util.List; -import java.util.Set; - import org.kohsuke.MetaInfServices; -import de.jplag.ParsingException; -import de.jplag.Token; +import de.jplag.antlr.AbstractAntlrLanguage; @MetaInfServices(de.jplag.Language.class) -public class PythonLanguage implements de.jplag.Language { +public class PythonLanguage extends AbstractAntlrLanguage { private static final String IDENTIFIER = "python3"; - private final Parser parser; - public PythonLanguage() { - parser = new Parser(); + super(new PythonParserAdapter()); } @Override @@ -39,9 +32,4 @@ public String getIdentifier() { public int minimumTokenMatch() { return 12; } - - @Override - public List parse(Set files, boolean normalize) throws ParsingException { - return this.parser.parse(files); - } } diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java new file mode 100644 index 000000000..ab224ae3f --- /dev/null +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java @@ -0,0 +1,107 @@ +package de.jplag.python3; + +import static de.jplag.python3.Python3TokenType.APPLY; +import static de.jplag.python3.Python3TokenType.ARRAY; +import static de.jplag.python3.Python3TokenType.ASSERT; +import static de.jplag.python3.Python3TokenType.ASSIGN; +import static de.jplag.python3.Python3TokenType.BREAK; +import static de.jplag.python3.Python3TokenType.CLASS_BEGIN; +import static de.jplag.python3.Python3TokenType.CLASS_END; +import static de.jplag.python3.Python3TokenType.CONTINUE; +import static de.jplag.python3.Python3TokenType.DEC_BEGIN; +import static de.jplag.python3.Python3TokenType.DEC_END; +import static de.jplag.python3.Python3TokenType.DEL; +import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN; +import static de.jplag.python3.Python3TokenType.EXCEPT_END; +import static de.jplag.python3.Python3TokenType.FINALLY; +import static de.jplag.python3.Python3TokenType.FOR_BEGIN; +import static de.jplag.python3.Python3TokenType.FOR_END; +import static de.jplag.python3.Python3TokenType.IF_BEGIN; +import static de.jplag.python3.Python3TokenType.IF_END; +import static de.jplag.python3.Python3TokenType.IMPORT; +import static de.jplag.python3.Python3TokenType.LAMBDA; +import static de.jplag.python3.Python3TokenType.METHOD_BEGIN; +import static de.jplag.python3.Python3TokenType.METHOD_END; +import static de.jplag.python3.Python3TokenType.RAISE; +import static de.jplag.python3.Python3TokenType.RETURN; +import static de.jplag.python3.Python3TokenType.TRY_BEGIN; +import static de.jplag.python3.Python3TokenType.WHILE_BEGIN; +import static de.jplag.python3.Python3TokenType.WHILE_END; +import static de.jplag.python3.Python3TokenType.WITH_BEGIN; +import static de.jplag.python3.Python3TokenType.WITH_END; +import static de.jplag.python3.Python3TokenType.YIELD; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.python3.grammar.Python3Parser; +import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext; +import de.jplag.python3.grammar.Python3Parser.AugassignContext; +import de.jplag.python3.grammar.Python3Parser.Break_stmtContext; +import de.jplag.python3.grammar.Python3Parser.ClassdefContext; +import de.jplag.python3.grammar.Python3Parser.Continue_stmtContext; +import de.jplag.python3.grammar.Python3Parser.DecoratedContext; +import de.jplag.python3.grammar.Python3Parser.Del_stmtContext; +import de.jplag.python3.grammar.Python3Parser.DictorsetmakerContext; +import de.jplag.python3.grammar.Python3Parser.Except_clauseContext; +import de.jplag.python3.grammar.Python3Parser.For_stmtContext; +import de.jplag.python3.grammar.Python3Parser.FuncdefContext; +import de.jplag.python3.grammar.Python3Parser.If_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Import_stmtContext; +import de.jplag.python3.grammar.Python3Parser.LambdefContext; +import de.jplag.python3.grammar.Python3Parser.Raise_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Return_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Testlist_compContext; +import de.jplag.python3.grammar.Python3Parser.TrailerContext; +import de.jplag.python3.grammar.Python3Parser.Try_stmtContext; +import de.jplag.python3.grammar.Python3Parser.While_stmtContext; +import de.jplag.python3.grammar.Python3Parser.With_stmtContext; +import de.jplag.python3.grammar.Python3Parser.Yield_argContext; +import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext; + +public class PythonListener extends AbstractAntlrListener { + public PythonListener() { + statements(); + controlStructures(); + contexts(); + values(); + } + + private void statements() { + visit(Assert_stmtContext.class).map(ASSERT); + visit(Raise_stmtContext.class).map(RAISE); + visit(Return_stmtContext.class).map(RETURN); + visit(Yield_argContext.class).map(YIELD); + visit(Yield_stmtContext.class).map(YIELD); + visit(Import_stmtContext.class).map(IMPORT); + visit(Break_stmtContext.class).map(BREAK); + visit(Continue_stmtContext.class).map(CONTINUE); + visit(Del_stmtContext.class).map(DEL); + visit(Python3Parser.FINALLY).map(FINALLY); + + visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END); + } + + private void controlStructures() { + visit(While_stmtContext.class).map(WHILE_BEGIN, WHILE_END); + visit(Try_stmtContext.class).map(TRY_BEGIN); + visit(If_stmtContext.class).map(IF_BEGIN, IF_END); + visit(With_stmtContext.class).map(WITH_BEGIN, WITH_END); + visit(For_stmtContext.class).map(FOR_BEGIN, FOR_END); + } + + private void contexts() { + visit(DecoratedContext.class).map(DEC_BEGIN, DEC_END); + visit(LambdefContext.class).map(LAMBDA); + visit(ClassdefContext.class).map(CLASS_BEGIN, CLASS_END); + visit(FuncdefContext.class).map(METHOD_BEGIN, METHOD_END); + } + + private void values() { + visit(DictorsetmakerContext.class).map(ARRAY); + visit(Testlist_compContext.class, context -> context.getText().contains(",")).map(ARRAY); + visit(AugassignContext.class).map(ASSIGN); + visit(Python3Parser.ASSIGN).map(ASSIGN); + + visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) == '(').map(APPLY); + visit(TrailerContext.class, ctx -> ctx.getText().charAt(0) != '(').map(ARRAY); + } +} diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java new file mode 100644 index 000000000..8d99920f0 --- /dev/null +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonParserAdapter.java @@ -0,0 +1,33 @@ +package de.jplag.python3; + +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.Lexer; +import org.antlr.v4.runtime.ParserRuleContext; + +import de.jplag.antlr.AbstractAntlrListener; +import de.jplag.antlr.AbstractAntlrParserAdapter; +import de.jplag.python3.grammar.Python3Lexer; +import de.jplag.python3.grammar.Python3Parser; + +public class PythonParserAdapter extends AbstractAntlrParserAdapter { + @Override + protected Lexer createLexer(CharStream input) { + return new Python3Lexer(input); + } + + @Override + protected Python3Parser createParser(CommonTokenStream tokenStream) { + return new Python3Parser(tokenStream); + } + + @Override + protected ParserRuleContext getEntryContext(Python3Parser parser) { + return parser.file_input(); + } + + @Override + protected AbstractAntlrListener getListener() { + return new PythonListener(); + } +} From c77b372edf375a6cf56f18b9785efd86204a1f64 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 14:06:34 +0200 Subject: [PATCH 3/7] Migrated python language module to new syntax --- .../python3/grammar/Python3LexerBase.java | 30 ++++++++----------- .../python3/grammar/Python3ParserBase.java | 12 +++----- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java index 9b842dc8c..b5a0e5501 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3LexerBase.java @@ -1,10 +1,10 @@ package de.jplag.python3.grammar; -import org.antlr.v4.runtime.*; - import java.util.ArrayDeque; import java.util.Deque; +import org.antlr.v4.runtime.*; + abstract class Python3LexerBase extends Lexer { // A queue where extra tokens are pushed on (see the NEWLINE lexer rule). private java.util.LinkedList tokens = new java.util.LinkedList<>(); @@ -75,10 +75,10 @@ private CommonToken commonToken(int type, String text) { // following rules into account: // // "Tabs are replaced (from left to right) by one to eight spaces - // such that the total number of characters up to and including - // the replacement is a multiple of eight [...]" + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" // - // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation + // -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation static int getIndentationCount(String spaces) { int count = 0; for (char ch : spaces.toCharArray()) { @@ -99,15 +99,15 @@ boolean atStartOfInput() { return super.getCharPositionInLine() == 0 && super.getLine() == 1; } - void openBrace(){ + void openBrace() { this.opened++; } - void closeBrace(){ + void closeBrace() { this.opened--; } - void onNewLine(){ + void onNewLine() { String newLine = getText().replaceAll("[^\r\n\f]+", ""); String spaces = getText().replaceAll("[\r\n\f]+", ""); @@ -119,22 +119,19 @@ void onNewLine(){ // If we're inside a list or on a blank line, ignore all indents, // dedents and line breaks. skip(); - } - else { + } else { emit(commonToken(Python3Lexer.NEWLINE, newLine)); int indent = getIndentationCount(spaces); int previous = indents.isEmpty() ? 0 : indents.peek(); if (indent == previous) { // skip indents of the same size as the present indent-size skip(); - } - else if (indent > previous) { + } else if (indent > previous) { indents.push(indent); emit(commonToken(Python3Lexer.INDENT, spaces)); - } - else { + } else { // Possibly emit more than 1 DEDENT token. - while(!indents.isEmpty() && indents.peek() > indent) { + while (!indents.isEmpty() && indents.peek() > indent) { this.emit(createDedent()); indents.pop(); } @@ -143,8 +140,7 @@ else if (indent > previous) { } @Override - public void reset() - { + public void reset() { tokens = new java.util.LinkedList<>(); indents = new ArrayDeque<>(); opened = 0; diff --git a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java index c3623ed74..713af92c1 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java +++ b/languages/python-3/src/main/java/de/jplag/python3/grammar/Python3ParserBase.java @@ -2,20 +2,16 @@ import org.antlr.v4.runtime.*; -public abstract class Python3ParserBase extends Parser -{ - protected Python3ParserBase(TokenStream input) - { +public abstract class Python3ParserBase extends Parser { + protected Python3ParserBase(TokenStream input) { super(input); } - public boolean CannotBePlusMinus() - { + public boolean CannotBePlusMinus() { return true; } - public boolean CannotBeDotLpEq() - { + public boolean CannotBeDotLpEq() { return true; } } \ No newline at end of file From 0a67ccc1a89581b36170c50015718d424921caf3 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 14:14:36 +0200 Subject: [PATCH 4/7] Added tokens for ASYNC and AWAIT from python 3.7 --- .../de/jplag/python3/Python3TokenType.java | 4 ++- .../java/de/jplag/python3/PythonListener.java | 36 +++---------------- 2 files changed, 8 insertions(+), 32 deletions(-) diff --git a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java index e4a684c9b..850522470 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java +++ b/languages/python-3/src/main/java/de/jplag/python3/Python3TokenType.java @@ -32,7 +32,9 @@ public enum Python3TokenType implements TokenType { YIELD("YIELD"), DEL("DEL"), WITH_BEGIN("WITH}"), - WITH_END("}WITH"); + WITH_END("}WITH"), + ASYNC("ASYNC"), + AWAIT("AWAIT"); private final String description; diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java index ab224ae3f..c4c7315ec 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java @@ -1,36 +1,5 @@ package de.jplag.python3; -import static de.jplag.python3.Python3TokenType.APPLY; -import static de.jplag.python3.Python3TokenType.ARRAY; -import static de.jplag.python3.Python3TokenType.ASSERT; -import static de.jplag.python3.Python3TokenType.ASSIGN; -import static de.jplag.python3.Python3TokenType.BREAK; -import static de.jplag.python3.Python3TokenType.CLASS_BEGIN; -import static de.jplag.python3.Python3TokenType.CLASS_END; -import static de.jplag.python3.Python3TokenType.CONTINUE; -import static de.jplag.python3.Python3TokenType.DEC_BEGIN; -import static de.jplag.python3.Python3TokenType.DEC_END; -import static de.jplag.python3.Python3TokenType.DEL; -import static de.jplag.python3.Python3TokenType.EXCEPT_BEGIN; -import static de.jplag.python3.Python3TokenType.EXCEPT_END; -import static de.jplag.python3.Python3TokenType.FINALLY; -import static de.jplag.python3.Python3TokenType.FOR_BEGIN; -import static de.jplag.python3.Python3TokenType.FOR_END; -import static de.jplag.python3.Python3TokenType.IF_BEGIN; -import static de.jplag.python3.Python3TokenType.IF_END; -import static de.jplag.python3.Python3TokenType.IMPORT; -import static de.jplag.python3.Python3TokenType.LAMBDA; -import static de.jplag.python3.Python3TokenType.METHOD_BEGIN; -import static de.jplag.python3.Python3TokenType.METHOD_END; -import static de.jplag.python3.Python3TokenType.RAISE; -import static de.jplag.python3.Python3TokenType.RETURN; -import static de.jplag.python3.Python3TokenType.TRY_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_BEGIN; -import static de.jplag.python3.Python3TokenType.WHILE_END; -import static de.jplag.python3.Python3TokenType.WITH_BEGIN; -import static de.jplag.python3.Python3TokenType.WITH_END; -import static de.jplag.python3.Python3TokenType.YIELD; - import de.jplag.antlr.AbstractAntlrListener; import de.jplag.python3.grammar.Python3Parser; import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext; @@ -57,6 +26,8 @@ import de.jplag.python3.grammar.Python3Parser.Yield_argContext; import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext; +import static de.jplag.python3.Python3TokenType.*; + public class PythonListener extends AbstractAntlrListener { public PythonListener() { statements(); @@ -77,6 +48,9 @@ private void statements() { visit(Del_stmtContext.class).map(DEL); visit(Python3Parser.FINALLY).map(FINALLY); + visit(Python3Parser.ASYNC).map(ASYNC); + visit(Python3Parser.AWAIT).map(AWAIT); + visit(Except_clauseContext.class).map(EXCEPT_BEGIN, EXCEPT_END); } From 6d8d66eae27d92c5fa0de306c0520ae655a7cac8 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 14:17:44 +0200 Subject: [PATCH 5/7] Spotless --- .../src/main/java/de/jplag/python3/PythonListener.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java index c4c7315ec..aa0dabb18 100644 --- a/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java +++ b/languages/python-3/src/main/java/de/jplag/python3/PythonListener.java @@ -1,5 +1,7 @@ package de.jplag.python3; +import static de.jplag.python3.Python3TokenType.*; + import de.jplag.antlr.AbstractAntlrListener; import de.jplag.python3.grammar.Python3Parser; import de.jplag.python3.grammar.Python3Parser.Assert_stmtContext; @@ -26,8 +28,6 @@ import de.jplag.python3.grammar.Python3Parser.Yield_argContext; import de.jplag.python3.grammar.Python3Parser.Yield_stmtContext; -import static de.jplag.python3.Python3TokenType.*; - public class PythonListener extends AbstractAntlrListener { public PythonListener() { statements(); From c559a3567b16e8e7e66138963d40313288868286 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 3 May 2024 14:44:24 +0200 Subject: [PATCH 6/7] Added async and await to the tests --- .../src/test/resources/de/jplag/python3/test_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py index 34d92252c..bfd3e8bf4 100644 --- a/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py +++ b/languages/python-3/src/test/resources/de/jplag/python3/test_utils.py @@ -500,4 +500,9 @@ def force_legacy_ssl_support(): def switchWithBreak(): while True: - break \ No newline at end of file + break + +async def x(): + return "" + +x = await x() \ No newline at end of file From fbe78a7e6ead43abe16fcc4625e784209cc58d91 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Thu, 16 May 2024 13:07:54 +0200 Subject: [PATCH 7/7] Designated the python module as beta --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 507651819..eeda7526f 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ All supported languages and their supported versions are listed below. | [C](https://isocpp.org) | 11 | c | legacy | JavaCC | | [C++](https://isocpp.org) | 14 | cpp | beta | ANTLR 4 | | [C#](https://docs.microsoft.com/en-us/dotnet/csharp/) | 6 | csharp | mature | ANTLR 4 | -| [Python](https://www.python.org) | 3.6 | python3 | legacy | ANTLR 4 | +| [Python](https://www.python.org) | 3.6 | python3 | beta | ANTLR 4 | | [JavaScript](https://www.javascript.com/) | ES6 | javascript | beta | ANTLR 4 | | [TypeScript](https://www.typescriptlang.org/) | [~5](https://github.com/antlr/grammars-v4/tree/master/javascript/typescript/README.md) | typescript | beta | ANTLR 4 | | [Go](https://go.dev) | 1.17 | golang | beta | ANTLR 4 |