Skip to content

Commit

Permalink
feat: add antlr grammar for types (#730)
Browse files Browse the repository at this point in the history
  • Loading branch information
scgkiran authored Nov 3, 2024
1 parent da0ac58 commit 820085f
Show file tree
Hide file tree
Showing 2 changed files with 201 additions and 0 deletions.
126 changes: 126 additions & 0 deletions grammar/SubstraitLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
lexer grammar SubstraitLexer;

options {
caseInsensitive = true;
}

// Whitespace and comment handling
LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ;
BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ;
Whitespace : [ \t\r]+ -> channel(HIDDEN) ;

fragment DIGIT: [0-9];

// Syntactic keywords.
If : 'IF';
Then : 'THEN';
Else : 'ELSE';

// TYPES
Boolean : 'BOOLEAN';
I8 : 'I8';
I16 : 'I16';
I32 : 'I32';
I64 : 'I64';
FP32 : 'FP32';
FP64 : 'FP64';
String : 'STRING';
Binary : 'BINARY';
Timestamp: 'TIMESTAMP';
Timestamp_TZ: 'TIMESTAMP_TZ';
Date : 'DATE';
Time : 'TIME';
Interval_Year: 'INTERVAL_YEAR';
Interval_Day: 'INTERVAL_DAY';
UUID : 'UUID';
Decimal : 'DECIMAL';
Precision_Timestamp: 'PRECISION_TIMESTAMP';
Precision_Timestamp_TZ: 'PRECISION_TIMESTAMP_TZ';
FixedChar: 'FIXEDCHAR';
VarChar : 'VARCHAR';
FixedBinary: 'FIXEDBINARY';
Struct : 'STRUCT';
NStruct : 'NSTRUCT';
List : 'LIST';
Map : 'MAP';
UserDefined: 'U!';

// short names for types
Bool: 'BOOL';
Str: 'STR';
VBin: 'VBIN';
Ts: 'TS';
TsTZ: 'TSTZ';
IYear: 'IYEAR';
IDay: 'IDAY';
Dec: 'DEC';
PTs: 'PTS';
PTsTZ: 'PTSTZ';
FChar: 'FCHAR';
VChar: 'VCHAR';
FBin: 'FBIN';

Any: 'ANY';
AnyVar: Any [0-9];

DoubleColon: '::';

// MATH
Plus : '+';
Minus : '-';
Asterisk : '*';
ForwardSlash : '/';
Percent : '%';

// COMPARE
Eq : '=';
Ne : '!=';
Gte : '>=';
Lte : '<=';
Gt : '>';
Lt : '<';
Bang : '!';

// ORGANIZE
OAngleBracket: Lt;
CAngleBracket: Gt;
OParen: '(';
CParen: ')';
OBracket: '[';
CBracket: ']';
Comma: ',';
Colon: ':';
QMark: '?';
Hash: '#';
Dot: '.';


// OPERATIONS
And : 'AND';
Or : 'OR';
Assign : ':=';



fragment Int
: '1'..'9' Digit*
| '0'
;

fragment Digit
: '0'..'9'
;

Number
: '-'? Int
;

Identifier
: ('A'..'Z' | '_' | '$') ('A'..'Z' | '_' | '$' | Digit)*
;

Newline
: ( '\r' '\n'?
| '\n'
)
;
75 changes: 75 additions & 0 deletions grammar/SubstraitType.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
grammar SubstraitType;

options {
caseInsensitive = true;
}

import SubstraitLexer;

startRule: expr EOF;

typeStatement: typeDef EOF;

scalarType
: Boolean #boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| String #string
| Binary #binary
| Timestamp #timestamp
| Timestamp_TZ #timestampTz
| Date #date
| Time #time
| Interval_Year #intervalYear
| UUID #uuid
;

parameterizedType
: FixedChar isnull=QMark? Lt length=numericParameter Gt #fixedChar
| VarChar isnull=QMark? Lt length=numericParameter Gt #varChar
| FixedBinary isnull=QMark? Lt length=numericParameter Gt #fixedBinary
| Decimal isnull=QMark? Lt precision=numericParameter Comma scale=numericParameter Gt #decimal
| Interval_Day isnull=QMark? Lt precision=numericParameter Gt #precisionIntervalDay
| Precision_Timestamp isnull=QMark? Lt precision=numericParameter Gt #precisionTimestamp
| Precision_Timestamp_TZ isnull=QMark? Lt precision=numericParameter Gt #precisionTimestampTZ
| Struct isnull=QMark? Lt expr (Comma expr)* Gt #struct
| NStruct isnull=QMark? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
| List isnull=QMark? Lt expr Gt #list
| Map isnull=QMark? Lt key=expr Comma value=expr Gt #map
| UserDefined Identifier isnull=QMark? (Lt expr (Comma expr)* Gt)? #userDefined
;

numericParameter
: Number #numericLiteral
| Identifier #numericParameterName
| expr #numericExpression
;

anyType
: Any isnull=QMark?
| AnyVar isnull=QMark?
;

typeDef
: scalarType isnull=QMark?
| parameterizedType
| anyType
;

expr
: OParen expr CParen #ParenExpression
| Identifier Eq expr Newline+ (Identifier Eq expr Newline+)* finalType=typeDef Newline* #MultilineDefinition
| typeDef #TypeLiteral
| Number #LiteralNumber
| Identifier isnull=QMark? #ParameterName
| Identifier OParen (expr (Comma expr)*)? CParen #FunctionCall
| left=expr op=(And | Or | Plus | Minus | Lt | Gt | Eq | Ne |
Lte | Gte | Asterisk | ForwardSlash) right=expr #BinaryExpr
| If ifExpr=expr Then thenExpr=expr Else elseExpr=expr #IfExpr
| (Bang) expr #NotExpr
| ifExpr=expr QMark thenExpr=expr Colon elseExpr=expr #Ternary
;

0 comments on commit 820085f

Please sign in to comment.