Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add antlr grammar for testcase file #2

Open
wants to merge 19 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ repos:
rev: 6.1.0
hooks:
- id: flake8
- repo: local
hooks:
- id: check-substrait-extensions
name: Check Substrait extensions
entry: pytest tests/test_extensions.py::test_read_substrait_extensions
language: python
pass_filenames: false

109 changes: 109 additions & 0 deletions grammar/FuncTestCaseLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
lexer grammar FuncTestCaseLexer;

import SubstraitLexer;

SUBSTRAIT_SCALAR_TEST
: '### SUBSTRAIT_SCALAR_TEST:'
;

FORMAT_VERSION
: 'v' DIGIT+ ('.' DIGIT+)?
;

SUBSTRAIT_INCLUDE
: '### SUBSTRAIT_INCLUDE:'
;

DESCRIPTION_LINE
: '# ' ~[\r\n]* '\r'? '\n'
;

ERROR_RESULT
: '<!ERROR>'
;

UNDEFINED_RESULT
: '<!UNDEFINED>'
;


OVERFLOW: 'overlfow';
ROUNDING: 'rounding';
ERROR: 'ERROR';
SATURATE: 'SATURATE';
SILENT: 'SILENT';
TIE_TO_EVEN: 'TIE_TO_EVEN';
NAN: 'NAN';


INTEGER_LITERAL
: [+-]? INTEGER
;

DECIMAL_LITERAL
: [+-]? [0-9]+ ('.' [0-9]+)?
;

FLOAT_LITERAL
: [+-]? [0-9]+ ('.' [0-9]*)? ( [eE] [+-]? [0-9]+ )?
| [+-]? 'inf'
| 'nan' | 'NaN'
| 'snan'
;

BOOLEAN_LITERAL
: 'true' | 'false'
;

fragment FourDigits: [0-9][0-9][0-9][0-9];
fragment TwoDigits: [0-9][0-9];

TIMESTAMP_TZ_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )?
[+-] TwoDigits ':' TwoDigits '\''
;

TIMESTAMP_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

TIME_LITERAL
: '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

DATE_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits '\''
;

PERIOD_PREFIX: 'P';
TIME_PREFIX: 'T';
YEAR_SUFFIX: 'Y';
M_SUFFIX: 'M'; // used for both months and minutes
DAY_SUFFIX: 'D';
HOUR_SUFFIX: 'H';
SECOND_SUFFIX: 'S';
FRACTIONAL_SECOND_SUFFIX: 'F';

INTERVAL_YEAR_LITERAL
: '\'' PERIOD_PREFIX INTEGER_LITERAL YEAR_SUFFIX (INTEGER_LITERAL M_SUFFIX)? '\''
| '\'' PERIOD_PREFIX INTEGER_LITERAL M_SUFFIX '\''
;

INTERVAL_DAY_LITERAL
: '\'' PERIOD_PREFIX INTEGER_LITERAL DAY_SUFFIX (TIME_PREFIX TIME_INTERVAL)? '\''
| '\'' PERIOD_PREFIX TIME_PREFIX TIME_INTERVAL '\''
;

fragment TIME_INTERVAL
: INTEGER_LITERAL HOUR_SUFFIX (INTEGER_LITERAL M_SUFFIX)? (INTEGER_LITERAL SECOND_SUFFIX)?
(INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL M_SUFFIX (INTEGER_LITERAL SECOND_SUFFIX)? (INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL SECOND_SUFFIX (INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX
;

NULL_LITERAL: 'null';

STRING_LITERAL
: '\'' ('\\' . | '\'\'' | ~['\\])* '\''
;
222 changes: 222 additions & 0 deletions grammar/FuncTestCaseParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
parser grammar FuncTestCaseParser;

options {
tokenVocab=SubstraitLexer;
tokenVocab=FuncTestCaseLexer;
}

doc
: header testGroup+ EOF
;

header
: version include
;

version
: SUBSTRAIT_SCALAR_TEST FORMAT_VERSION
;

include
: SUBSTRAIT_INCLUDE STRING_LITERAL (COMMA STRING_LITERAL)*
;

testGroupDescription
: DESCRIPTION_LINE
;

testCase
: functionName=IDENTIFIER OPAREN arguments CPAREN ( OBRACKET func_options CBRACKET )? EQ result
;

testGroup
: testGroupDescription (testCase)+
;

arguments
: argument (COMMA argument)*
;

result
: argument
| substraitError
;

argument
: nullArg
| i8Arg | i16Arg | i32Arg | i64Arg
| fp32Arg | fp64Arg
| booleanArg
| stringArg
| decimalArg
| dateArg
| timeArg
| timestampArg
| timestampTzArg
| intervalYearArg
| intervalDayArg
;

numericLiteral
: DECIMAL_LITERAL | INTEGER_LITERAL | FLOAT_LITERAL
;

nullArg: NULL_LITERAL DOUBLE_COLON datatype;

i8Arg: INTEGER_LITERAL DOUBLE_COLON I8;

i16Arg: INTEGER_LITERAL DOUBLE_COLON I16;

i32Arg: INTEGER_LITERAL DOUBLE_COLON I32;

i64Arg: INTEGER_LITERAL DOUBLE_COLON I64;

fp32Arg
: numericLiteral DOUBLE_COLON FP32
;

fp64Arg
: numericLiteral DOUBLE_COLON FP64
;

decimalArg
: numericLiteral DOUBLE_COLON decimalType
;

booleanArg
: BOOLEAN_LITERAL DOUBLE_COLON Bool
;

stringArg
: STRING_LITERAL DOUBLE_COLON Str
;

dateArg
: DATE_LITERAL DOUBLE_COLON Date
;

timeArg
: TIME_LITERAL DOUBLE_COLON Time
;

timestampArg
: TIMESTAMP_LITERAL DOUBLE_COLON Ts
;

timestampTzArg
: TIMESTAMP_TZ_LITERAL DOUBLE_COLON TsTZ
;

intervalYearArg
: INTERVAL_YEAR_LITERAL DOUBLE_COLON IYear
;

intervalDayArg
: INTERVAL_DAY_LITERAL DOUBLE_COLON IDay
;

intervalYearLiteral
: PERIOD_PREFIX (years=INTEGER_LITERAL YEAR_SUFFIX) (months=INTEGER_LITERAL M_SUFFIX)?
| PERIOD_PREFIX (months=INTEGER_LITERAL M_SUFFIX)
;

intervalDayLiteral
: PERIOD_PREFIX (days=INTEGER_LITERAL DAY_SUFFIX) (TIME_PREFIX timeInterval)?
| PERIOD_PREFIX TIME_PREFIX timeInterval
;

timeInterval
: hours=INTEGER_LITERAL HOUR_SUFFIX (minutes=INTEGER_LITERAL M_SUFFIX)? (seconds=INTEGER_LITERAL SECOND_SUFFIX)?
(fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| minutes=INTEGER_LITERAL M_SUFFIX (seconds=INTEGER_LITERAL SECOND_SUFFIX)? (fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| seconds=INTEGER_LITERAL SECOND_SUFFIX (fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX
;

datatype
: scalarType
| parameterizedType
;

scalarType
: Bool #Boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| Str #string
| Binary #binary
| Ts #timestamp
| TsTZ #timestampTz
| Date #date
| Time #time
| IDay #intervalDay
| IYear #intervalYear
| UUID #uuid
| UserDefined IDENTIFIER #userDefined
;

fixedCharType
: FChar isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #fixedChar
;

varCharType
: VChar isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #varChar
;

fixedBinaryType
: FBin isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #fixedBinary
;

decimalType
: Dec isnull=QMARK? (O_ANGLE_BRACKET precision=numericParameter COMMA scale=numericParameter C_ANGLE_BRACKET)? #decimal
;

precisionTimestampType
: PTs isnull=QMARK? O_ANGLE_BRACKET precision=numericParameter C_ANGLE_BRACKET #precisionTimestamp
;

precisionTimestampTZType
: PTsTZ isnull=QMARK? O_ANGLE_BRACKET precision=numericParameter C_ANGLE_BRACKET #precisionTimestampTZ
;

parameterizedType
: fixedCharType
| varCharType
| fixedBinaryType
| decimalType
| precisionTimestampType
| precisionTimestampTZType
// TODO implement the rest of the parameterized types
// | Struct isnull='?'? Lt expr (Comma expr)* Gt #struct
// | NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
// | List isnull='?'? Lt expr Gt #list
// | Map isnull='?'? Lt key=expr Comma value=expr Gt #map
;

numericParameter
: INTEGER_LITERAL #integerLiteral
;

substraitError
: ERROR_RESULT | UNDEFINED_RESULT
;

func_option
: option_name COLON option_value
;

option_name
: OVERFLOW | ROUNDING
| IDENTIFIER
;

option_value
: ERROR | SATURATE | SILENT | TIE_TO_EVEN | NAN
;

func_options
: func_option (COMMA func_option)*
;
9 changes: 9 additions & 0 deletions grammar/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ANTLR_JAR=antlr-4.13.2-complete.jar
GRAMMARS=SubstraitLexer.g4 FuncTestCaseLexer.g4 FuncTestCaseParser.g4
OUTPUT_DIR=../tests/coverage/antlr_parser

generate:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(OUTPUT_DIR) $(GRAMMARS)

clean:
rm -rf $(OUTPUT_DIR)/*.py
Loading
Loading