diff --git a/grammar/SubstraitLexer.g4 b/grammar/SubstraitLexer.g4 new file mode 100644 index 000000000..0b6d515e9 --- /dev/null +++ b/grammar/SubstraitLexer.g4 @@ -0,0 +1,126 @@ +lexer grammar SubstraitLexer; + +options { + caseInsensitive = true; +} + +// Whitespace and comment handling +LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ; +BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ; +Whitespace : [ \t\r]+ -> channel(HIDDEN) ; + +fragment DIGIT: [0-9]; + +// Syntactic keywords. +If : 'IF'; +Then : 'THEN'; +Else : 'ELSE'; + +// TYPES +Boolean : 'BOOLEAN'; +I8 : 'I8'; +I16 : 'I16'; +I32 : 'I32'; +I64 : 'I64'; +FP32 : 'FP32'; +FP64 : 'FP64'; +String : 'STRING'; +Binary : 'BINARY'; +Timestamp: 'TIMESTAMP'; +Timestamp_TZ: 'TIMESTAMP_TZ'; +Date : 'DATE'; +Time : 'TIME'; +Interval_Year: 'INTERVAL_YEAR'; +Interval_Day: 'INTERVAL_DAY'; +UUID : 'UUID'; +Decimal : 'DECIMAL'; +Precision_Timestamp: 'PRECISION_TIMESTAMP'; +Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ'; +FixedChar: 'FIXEDCHAR'; +VarChar : 'VARCHAR'; +FixedBinary: 'FIXEDBINARY'; +Struct : 'STRUCT'; +NStruct : 'NSTRUCT'; +List : 'LIST'; +Map : 'MAP'; +UserDefined: 'U!'; + +// short names for types +Bool: 'BOOL'; +Str: 'STR'; +VBin: 'VBIN'; +Ts: 'TS'; +TsTZ: 'TSTZ'; +IYear: 'IYEAR'; +IDay: 'IDAY'; +Dec: 'DEC'; +PTs: 'PTS'; +PTsTZ: 'PTSTZ'; +FChar: 'FCHAR'; +VChar: 'VCHAR'; +FBin: 'FBIN'; + +Any: 'ANY'; +AnyVar: Any [0-9]; + +DoubleColon: '::'; + +// MATH +Plus : '+'; +Minus : '-'; +Asterisk : '*'; +ForwardSlash : '/'; +Percent : '%'; + +// COMPARE +Eq : '='; +Ne : '!='; +Gte : '>='; +Lte : '<='; +Gt : '>'; +Lt : '<'; +Bang : '!'; + +// ORGANIZE +OAngleBracket: Lt; +CAngleBracket: Gt; +OParen: '('; +CParen: ')'; +OBracket: '['; +CBracket: ']'; +Comma: ','; +Colon: ':'; +QMark: '?'; +Hash: '#'; +Dot: '.'; + + +// OPERATIONS +And : 'AND'; +Or : 'OR'; +Assign : ':='; + + + +fragment Int + : '1'..'9' Digit* + | '0' + ; + +fragment Digit + : '0'..'9' + ; + +Number + : '-'? Int + ; + +Identifier + : ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)* + ; + +Newline + : ( '\r' '\n'? + | '\n' + ) + ; diff --git a/grammar/SubstraitType.g4 b/grammar/SubstraitType.g4 new file mode 100644 index 000000000..84472c9f0 --- /dev/null +++ b/grammar/SubstraitType.g4 @@ -0,0 +1,75 @@ +grammar SubstraitType; + +options { + caseInsensitive = true; +} + +import SubstraitLexer; + +startRule: expr EOF; + +typeStatement: typeDef EOF; + +scalarType + : Boolean #boolean + | I8 #i8 + | I16 #i16 + | I32 #i32 + | I64 #i64 + | FP32 #fp32 + | FP64 #fp64 + | String #string + | Binary #binary + | Timestamp #timestamp + | Timestamp_TZ #timestampTz + | Date #date + | Time #time + | Interval_Year #intervalYear + | UUID #uuid + ; + +parameterizedType + : FixedChar isnull=QMark? Lt length=numericParameter Gt #fixedChar + | VarChar isnull=QMark? Lt length=numericParameter Gt #varChar + | FixedBinary isnull=QMark? Lt length=numericParameter Gt #fixedBinary + | Decimal isnull=QMark? Lt precision=numericParameter Comma scale=numericParameter Gt #decimal + | Interval_Day isnull=QMark? Lt precision=numericParameter Gt #precisionIntervalDay + | Precision_Timestamp isnull=QMark? Lt precision=numericParameter Gt #precisionTimestamp + | Precision_Timestamp_TZ isnull=QMark? Lt precision=numericParameter Gt #precisionTimestampTZ + | Struct isnull=QMark? Lt expr (Comma expr)* Gt #struct + | NStruct isnull=QMark? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct + | List isnull=QMark? Lt expr Gt #list + | Map isnull=QMark? Lt key=expr Comma value=expr Gt #map + | UserDefined Identifier isnull=QMark? (Lt expr (Comma expr)* Gt)? #userDefined + ; + +numericParameter + : Number #numericLiteral + | Identifier #numericParameterName + | expr #numericExpression + ; + +anyType + : Any isnull=QMark? + | AnyVar isnull=QMark? + ; + +typeDef + : scalarType isnull=QMark? + | parameterizedType + | anyType + ; + +expr + : OParen expr CParen #ParenExpression + | Identifier Eq expr Newline+ (Identifier Eq expr Newline+)* finalType=typeDef Newline* #MultilineDefinition + | typeDef #TypeLiteral + | Number #LiteralNumber + | Identifier isnull=QMark? #ParameterName + | Identifier OParen (expr (Comma expr)*)? CParen #FunctionCall + | left=expr op=(And | Or | Plus | Minus | Lt | Gt | Eq | Ne | + Lte | Gte | Asterisk | ForwardSlash) right=expr #BinaryExpr + | If ifExpr=expr Then thenExpr=expr Else elseExpr=expr #IfExpr + | (Bang) expr #NotExpr + | ifExpr=expr QMark thenExpr=expr Colon elseExpr=expr #Ternary + ;