Skip to content

Commit

Permalink
feat: add antlr grammar for types
Browse files Browse the repository at this point in the history
  • Loading branch information
scgkiran committed Oct 31, 2024
1 parent da0ac58 commit 0cffb36
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 0 deletions.
157 changes: 157 additions & 0 deletions grammar/SubstraitLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
lexer grammar SubstraitLexer;

// Whitespace and comment handling
LineComment : '//' ~[\r\n]* -> channel(HIDDEN) ;
BlockComment : ( '/*' ( ~'*' | '*'+ ~[*/] ) '*'* '*/' ) -> channel(HIDDEN) ;
Whitespace : [ \t\r]+ -> channel(HIDDEN) ;

// Substrait is case-insensitive, ANTLR is not. So, in order to define our
// keywords in a somewhat readable way, we have to define these shortcuts.

fragment A : [aA];
fragment B : [bB];
fragment C : [cC];
fragment D : [dD];
fragment E : [eE];
fragment F : [fF];
fragment G : [gG];
fragment H : [hH];
fragment I : [iI];
fragment J : [jJ];
fragment K : [kK];
fragment L : [lL];
fragment M : [mM];
fragment N : [nN];
fragment O : [oO];
fragment P : [pP];
fragment Q : [qQ];
fragment R : [rR];
fragment S : [sS];
fragment T : [tT];
fragment U : [uU];
fragment V : [vV];
fragment W : [wW];
fragment X : [xX];
fragment Y : [yY];
fragment Z : [zZ];

fragment DIGIT: [0-9];

fragment INTEGER
: '0'
| [1-9] [0-9]*
;

// Syntactic keywords.
If : I F;
Then : T H E N;
Else : E L S E;

// TYPES
Boolean : B O O L E A N;
I8 : I '8';
I16 : I '16';
I32 : I '32';
I64 : I '64';
FP32 : F P '32';
FP64 : F P '64';
String : S T R I N G;
Binary : B I N A R Y;
Timestamp: T I M E S T A M P;
Timestamp_TZ: T I M E S T A M P '_' T Z;
Date : D A T E;
Time : T I M E;
Interval_Year: I N T E R V A L '_' Y E A R;
Interval_Day: I N T E R V A L '_' D A Y;
UUID : U U I D;
Decimal : D E C I M A L;
Precision_Timestamp: P R E C I S I O N '_' T I M E S T A M P;
Precision_Timestamp_TZ: P R E C I S I O N '_' T I M E S T A M P '_' T Z;
FixedChar: F I X E D C H A R;
VarChar : V A R C H A R;
FixedBinary: F I X E D B I N A R Y;
Struct : S T R U C T;
NStruct : N S T R U C T;
List : L I S T;
Map : M A P;
UserDefined: U '!';

// short names for types
Bool: B O O L;
Str: S T R;
VBin: V B I N;
Ts: T S;
TsTZ: T S T Z;
IYear: I Y E A R;
IDay: I D A Y;
Dec: D E C;
PTs: P T S;
PTsTZ: P T S T Z;
FChar: F C H A R;
VChar: V C H A R;
FBin: F B I N;

Any: A N Y;
AnyVar: A N Y [0-9];

DoubleColon: '::';

// MATH
Plus : '+';
Minus : '-';
Asterisk : '*';
ForwardSlash : '/';
Percent : '%';

// COMPARE
Eq : '=';
Ne : '!=';
Gte : '>=';
Lte : '<=';
Gt : '>';
Lt : '<';
Bang : '!';

// ORGANIZE
OAngleBracket: Lt;
CAngleBracket: Gt;
OParen: '(';
CParen: ')';
OBracket: '[';
CBracket: ']';
Comma: ',';
Colon: ':';
QMark: '?';
Hash: '#';
Dot: '.';


// OPERATIONS
And : A N D;
Or : O R;
Assign : ':=';



fragment Int
: '1'..'9' Digit*
| '0'
;

fragment Digit
: '0'..'9'
;

Number
: '-'? Int
;

Identifier
: ('a'..'z' | 'A'..'Z' | '_' | '$') ('a'..'z' | 'A'..'Z' | '_' | '$' | Digit)*
;

Newline
: ( '\r' '\n'?
| '\n'
)
;
71 changes: 71 additions & 0 deletions grammar/SubstraitType.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
grammar SubstraitType;

import SubstraitLexer;

startRule: expr EOF;

typeStatement: typeDef EOF;

scalarType
: Boolean #boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| String #string
| Binary #binary
| Timestamp #timestamp
| Timestamp_TZ #timestampTz
| Date #date
| Time #time
| Interval_Year #intervalYear
| UUID #uuid
;

parameterizedType
: FixedChar isnull=QMark? Lt length=numericParameter Gt #fixedChar
| VarChar isnull=QMark? Lt length=numericParameter Gt #varChar
| FixedBinary isnull=QMark? Lt length=numericParameter Gt #fixedBinary
| Decimal isnull=QMark? Lt precision=numericParameter Comma scale=numericParameter Gt #decimal
| Interval_Day isnull=QMark? Lt precision=numericParameter Gt #precisionIntervalDay
| Precision_Timestamp isnull=QMark? Lt precision=numericParameter Gt #precisionTimestamp
| Precision_Timestamp_TZ isnull=QMark? Lt precision=numericParameter Gt #precisionTimestampTZ
| Struct isnull=QMark? Lt expr (Comma expr)* Gt #struct
| NStruct isnull=QMark? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
| List isnull=QMark? Lt expr Gt #list
| Map isnull=QMark? Lt key=expr Comma value=expr Gt #map
| UserDefined Identifier isnull=QMark? (Lt expr (Comma expr)* Gt)? #userDefined
;

numericParameter
: Number #numericLiteral
| Identifier #numericParameterName
| expr #numericExpression
;

anyType
: Any isnull=QMark?
| AnyVar isnull=QMark?
;

typeDef
: scalarType isnull=QMark?
| parameterizedType
| anyType
;

expr
: OParen expr CParen #ParenExpression
| Identifier Eq expr Newline+ (Identifier Eq expr Newline+)* finalType=typeDef Newline* #MultilineDefinition
| typeDef #TypeLiteral
| number=Number #LiteralNumber
| identifier=Identifier isnull=QMark? #TypeParam
| Identifier OParen (expr (Comma expr)*)? CParen #FunctionCall
| left=expr op=(And | Or | Plus | Minus | Lt | Gt | Eq | Ne |
Lte | Gte | Asterisk | ForwardSlash) right=expr #BinaryExpr
| If ifExpr=expr Then thenExpr=expr Else elseExpr=expr #IfExpr
| (Bang) expr #NotExpr
| ifExpr=expr QMark thenExpr=expr Colon elseExpr=expr #Ternary
;

0 comments on commit 0cffb36

Please sign in to comment.