diff --git a/splash/kernel/lua_grammar/__init__.py b/splash/kernel/lua_grammar/__init__.py new file mode 100644 index 000000000..8c48bea14 --- /dev/null +++ b/splash/kernel/lua_grammar/__init__.py @@ -0,0 +1,3 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import +from . import forward, backward diff --git a/splash/kernel/lua_grammar/backward.py b/splash/kernel/lua_grammar/backward.py new file mode 100644 index 000000000..bd9fafd09 --- /dev/null +++ b/splash/kernel/lua_grammar/backward.py @@ -0,0 +1,233 @@ +# -*- coding: utf-8 -*- +""" +A complete parser for Lua 5.2 grammar. It is based on grammar as described in +http://www.lua.org/manual/5.2/manual.html#9. + +Left recursion is eliminated to make the grammar compatible +with top-down parser which funcparserlib uses. + +This grammar works on **reversed** sequence of tokens, so it is more suitable +for autocompletion. + +The parser is not concerned with operator precedence. + +.. warning: + + This parser is experimental and untested! + Parsing of statements is definitely broken. + +""" +from __future__ import absolute_import +from funcparserlib import parser as p + +from .common import T, node, Name, String, Number, fieldsep, unop, binop + + +exp = p.forward_decl() +block = p.forward_decl() +explist = p.forward_decl() +var = p.forward_decl() + + +# field ::= ‘[’ exp ‘]’ ‘=’ exp | Name ‘=’ exp | exp +field = ( + (exp + T('=') + T(']') + exp + T('[')) | + (exp + T('=') + Name) | + exp +) >> node('field') + + +# fieldlist ::= field {fieldsep field} [fieldsep] +fieldlist = ( + p.maybe(fieldsep) + p.many(field + fieldsep) + field +) >> node("fieldlist") + + +# tableconstructor ::= ‘{’ [fieldlist] ‘}’ +tableconstructor = ( + T('}') + p.maybe(fieldlist) + T('{') +) >> node('tableconstructor') + + +# XXX: moved here in order to avoid unnecessary forward declarations +# namelist ::= Name {‘,’ Name} +namelist = (p.many(Name + T(',')) + Name) >> node('namelist') + + +# parlist ::= namelist [‘,’ ‘...’] | ‘...’ +parlist = ( + (p.maybe(T('...') + T(',')) + namelist) | + T('...') +) >> node('parlist') + + +# funcbody ::= ‘(’ [parlist] ‘)’ block end +funcbody = (T('end') + block + T(')') + p.maybe(parlist) + T('(')) >> node('funcbody') + + +# functiondef ::= function funcbody +functiondef = (funcbody + T('function')) >> node('functiondef') + + +# args ::= ‘(’ [explist] ‘)’ | tableconstructor | String +args = ( + (T(')') + p.maybe(explist) + T('(')) | + tableconstructor | + String +) >> node('args') + + +# +# Unlike "forward" grammar, there is no left recursion in +# functioncall/prefixexp/var. +# + +# functioncall ::= prefixexp args | prefixexp ‘:’ Name args +prefixexp = p.forward_decl() +functioncall = (args + Name + T(':') + prefixexp | args + prefixexp) >> node("functioncall") + + +# prefixexp ::= var | functioncall | ‘(’ exp ‘)’ +prefixexp.define(( + functioncall | + T(')') + exp + T('(') | + var +) >> node("prefixexp")) + + +# var ::= Name | prefixexp ‘[’ exp ‘]’ | prefixexp ‘.’ Name +var.define(( + Name + T('.') + prefixexp | + T(']') + exp + T('[') + prefixexp | + Name +) >> node("var")) + + +# exp ::= nil | false | true | Number | String | ‘...’ | functiondef | +# prefixexp | tableconstructor | exp binop exp | unop exp +# +# This rule is left-recursive. Left recursion can be eliminated +# using the following method: +# +# (A -> Aa | B) <=> (A -> BA'); (A' -> aA' | e) +# +# ============================= Let's do it: +# A := exp +# a := (binop + exp) | unop +# B := _exp_B + +_exp_B = ( + tableconstructor | + prefixexp | + functiondef | + T('nil') | + T('false') | + T('true') | + Number | + String | + T('...') +) # >> node("_exp_B") + +# A' -> aA' | e +_exp_A1 = p.maybe(binop + exp | unop) # >> node("_exp_A1") + +# A -> BA' +exp.define(_exp_B + _exp_A1 >> node("exp")) + +# ============================== done eliminating left recursion. + + +# explist ::= exp {‘,’ exp} +explist.define(p.many(exp + p.skip(T(','))) + exp >> node('explist')) + + +# varlist ::= var {‘,’ var} +varlist = p.many(var + T(',')) + var >> node('varlist') + + +# funcname ::= Name {‘.’ Name} [‘:’ Name] +funcname = p.maybe(Name + T(':')) + p.many(Name + T('.')) + Name >> node('funcname') + + +# label ::= ‘::’ Name ‘::’ +label = T('::') + Name + T('::') >> node('label') + + +# retstat ::= return [explist] [‘;’] +retstat = p.maybe(T(';')) + p.maybe(explist) + T('return') >> node('retstat') + + +# stat ::= ‘;’ | +# varlist ‘=’ explist | +# functioncall | +# label | +# break | +# goto Name | +# do block end | +# while exp do block end | +# repeat block until exp | +# if exp then block {elseif exp then block} [else block] end | +# for Name ‘=’ exp ‘,’ exp [‘,’ exp] do block end | +# for namelist in explist do block end | +# function funcname funcbody | +# local function Name funcbody | +# local namelist [‘=’ explist] + +# XXX: parsing of statements is broken, don't use it! + +local_var = explist + p.maybe(T('=') + namelist) + T('local') >> node("local-var") + +assignment = explist + T('=') + varlist >> node("assignment") + +local_function = funcbody + Name + T('function') + T('local') >> node("local-function") + +function = funcbody + funcname + T('function') >> node("function") + +for_in_loop = T('end') + block + T('do') + explist + T('in') + namelist + T('for') >> node("for-in-loop") + +for_loop = ( + T('end') + block + T('do') + + p.maybe(exp + T(',')) + exp + T(',') + exp + T('=') + Name + T('for') +) >> node("for-loop") + +if_then_else = ( + T('end') + + p.maybe(block + T('else')) + + p.many(block + T('then') + exp + T('elseif')) + + block + T('then') + exp + T('if') +) >> node('if-then-else') + +repeat_until = exp + T('until') + block + T('repeat') >> node('repeat-until') + +while_loop = T('end') + block + T('do') + exp + T('while') >> node('while-loop') + +do_block = T('end') + block + T('do') >> node('do-block') + +goto_statement = Name + T('goto') >> node('goto') + +stat = ( + local_var | + assignment | + local_function | + function | + for_in_loop | + for_loop | + if_then_else | + repeat_until | + while_loop | + do_block | + goto_statement | + T('break') | + label | + functioncall | + T(';') +) >> node('stat') + + +# block ::= {stat} [retstat] +block.define( + p.maybe(retstat) + p.many(stat) >> node('block') +) + +# chunk +chunk = block + p.finished diff --git a/splash/kernel/lua_grammar/common.py b/splash/kernel/lua_grammar/common.py new file mode 100644 index 000000000..f72872c4b --- /dev/null +++ b/splash/kernel/lua_grammar/common.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +""" +Common utilities and definitions for Lua parsing. +""" +from __future__ import absolute_import +from operator import attrgetter +from funcparserlib import parser as p + + +token_value = attrgetter("value") + +def T(val): + if isinstance(val, set): + return p.some(lambda t: t.value in val) >> token_value + else: + return p.some(lambda t: t.value == val) >> token_value + + +Name = p.some(lambda t: t.type == 'iden') >> token_value +String = p.some(lambda t: t.type == 'string') >> token_value +Number = p.some(lambda t: t.type == 'number') >> token_value + + +class Node(object): + def __init__(self, name, value): + self.name = name + self.value = value + + def __repr__(self): + return "%s:%r" % (self.name, self.value) + + def _repr_pretty_(self, p, cycle): + if cycle: + return "Node({name!r}, ...)".format(name=self.name) + + if isinstance(self.value, (list, tuple)): + with p.group(2, 'Node({name!r}, ['.format(name=self.name), '])'): + p.breakable() + for idx, v in enumerate(self.value): + if idx: + p.text(",") + p.breakable() + p.pretty(v) + else: + p.text('Node({name!r}, '.format(name=self.name)) + p.pretty(self.value) + p.breakable() + p.text(')') + + +def node(name): + return lambda value: Node(name, value) + + +# ======================= Common grammar parts ============================= + +# unop ::= ‘-’ | not | ‘#’ +unop = T({'-', 'not', '#'}) + + +# binop ::= ‘+’ | ‘-’ | ‘*’ | ‘/’ | ‘^’ | ‘%’ | ‘..’ | +# ‘<’ | ‘<=’ | ‘>’ | ‘>=’ | ‘==’ | ‘~=’ | +# and | or +binop = T(set("+-*/^%><") | {"..", "==", "~=", ">=", "<=", "and", "or"}) + + +# fieldsep ::= ‘,’ | ‘;’ +fieldsep = T({',', ';'}) diff --git a/splash/kernel/lua_grammar/forward.py b/splash/kernel/lua_grammar/forward.py new file mode 100644 index 000000000..cbabf3f76 --- /dev/null +++ b/splash/kernel/lua_grammar/forward.py @@ -0,0 +1,206 @@ +# -*- coding: utf-8 -*- +""" +A complete parser for Lua 5.2 grammar. It is based on grammar as described in +http://www.lua.org/manual/5.2/manual.html#9. + +Left recursion is eliminated to make the grammar compatible +with top-down parser which funcparserlib uses. + +This grammar works on straight sequence of tokens, so it is not suitable +for autocompletion. + +The parser is not concerned with operator precedence. + +.. warning: + + This parser is experimental and untested! + +""" +from __future__ import absolute_import + +from funcparserlib import parser as p + +from .common import T, node, Name, String, Number, fieldsep, unop, binop + +# ============================ Grammar ==================================== + +exp = p.forward_decl() +block = p.forward_decl() +explist = p.forward_decl() +var = p.forward_decl() + +# field ::= ‘[’ exp ‘]’ ‘=’ exp | Name ‘=’ exp | exp +field = ( + (T('[') + exp + T(']') + T('=') + exp) | + (Name + T('=') + exp) | + exp +) >> node('field') + + +# fieldlist ::= field {fieldsep field} [fieldsep] +fieldlist = field + p.many(fieldsep + field) + p.maybe(fieldsep) >> node("fieldlist") + + +# tableconstructor ::= ‘{’ [fieldlist] ‘}’ +tableconstructor = T('{') + p.maybe(fieldlist) + T('}') >> node('tableconstructor') + + +# XXX: moved here in order to avoid unnecessary forward declarations +# namelist ::= Name {‘,’ Name} +namelist = Name + p.many(T(',') + Name) >> node('namelist') + + +# parlist ::= namelist [‘,’ ‘...’] | ‘...’ +parlist = ( + (namelist + p.maybe(T(',') + T('...'))) | + T('...') +) >> node('parlist') + + +# funcbody ::= ‘(’ [parlist] ‘)’ block end +funcbody = T('(') + p.maybe(parlist) + block + T('end') >> node('funcbody') + + +# functiondef ::= function funcbody +functiondef = T('function') + funcbody >> node('functiondef') + + +# args ::= ‘(’ [explist] ‘)’ | tableconstructor | String +args = ( + T('(') + p.maybe(explist) + T(')') | + tableconstructor | + String +) >> node('args') + + +# functioncall ::= prefixexp args | prefixexp ‘:’ Name args +# prefixexp ::= var | functioncall | ‘(’ exp ‘)’ +# var ::= Name | prefixexp ‘[’ exp ‘]’ | prefixexp ‘.’ Name +# +# left recursion is eliminated like in +# https://github.com/antlr/grammars-v4/blob/master/lua/Lua.g4 +# + +name_and_args = ( + p.maybe(T(':') + args) + args +) >> node("name_and_args") + +var_suffix = ( + p.many(name_and_args) + + ( + (p.skip(T('[')) + exp + p.skip(T(']')) >> node("index_lookup")) | + (p.skip(T('.')) + Name) >> node("dot_lookup") + ) +) >> node("var_suffix") + +var_or_exp = (var | (T('(') + exp + T(')'))) >> node("var_or_exp") + +functioncall = var_or_exp + p.oneplus(name_and_args) >> node('functioncall') + +prefixexp = var_or_exp + p.many(name_and_args) >> node('prefixexp') + +var.define( + (Name | (T('(') + exp + T(')') + var_suffix)) + p.many(var_suffix) >> node('var') +) + +# exp ::= nil | false | true | Number | String | ‘...’ | functiondef | +# prefixexp | tableconstructor | exp binop exp | unop exp +# +# This rule is left-recursive. Left recursion can be eliminated +# using the following method: +# +# (A -> Aa | B) <=> (A -> BA'); (A' -> aA' | e) +# +# ============================= Let's do it: +# A := exp +# a := binop + exp +# B := _exp_prefix +_exp_prefix = ( + (unop + exp) >> node("_exp_unop") | + tableconstructor | + prefixexp | + functiondef | + T('nil') | + T('false') | + T('true') | + Number | + String | + T('...') +) # >> node("_exp_prefix") + +_exp_suffix = binop + exp # >> node("_exp_suffix") + +exp.define( + _exp_prefix + p.many(_exp_suffix) >> node('exp') +) +# ============================== done eliminating left recursion. + + +# explist ::= exp {‘,’ exp} +explist.define(exp + p.many(p.skip(T(',')) + exp) >> node('explist')) + + +# varlist ::= var {‘,’ var} +varlist = var + p.many(T(',') + var) >> node('varlist') + + +# funcname ::= Name {‘.’ Name} [‘:’ Name] +funcname = Name + p.many(T('.') + Name) + p.maybe(T(':') + Name) >> node('funcname') + + +# label ::= ‘::’ Name ‘::’ +label = T('::') + Name + T('::') >> node('label') + + +# retstat ::= return [explist] [‘;’] +retstat = T('return') + p.maybe(explist) + p.maybe(T(';')) >> node('retstat') + + +# stat ::= ‘;’ | +# varlist ‘=’ explist | +# functioncall | +# label | +# break | +# goto Name | +# do block end | +# while exp do block end | +# repeat block until exp | +# if exp then block {elseif exp then block} [else block] end | +# for Name ‘=’ exp ‘,’ exp [‘,’ exp] do block end | +# for namelist in explist do block end | +# function funcname funcbody | +# local function Name funcbody | +# local namelist [‘=’ explist] +stat = ( + (T('local') + namelist + p.maybe(T('=') + explist)) >> node("local-var") | + (varlist + T('=') + explist) >> node("assignment") | + (T('local') + T('function') + Name + funcbody) >> node("local-function") | + (T('function') + funcname + funcbody) >> node("function") | + (T('for') + namelist + T('in') + explist + T('do') + block + T('end')) >> node("for-in-loop")| + ( + T('for') + Name + T('=') + exp + T(',') + exp + p.maybe(T(',') + exp) + + T('do') + block + T('end') + ) >> node("for-loop") | + ( + T('if') + exp + T('then') + block + + p.many(T('elseif') + exp + T('then') + block) + + p.maybe(T('else') + block) + + T('end') + ) >> node('if-then-else') | + (T('repeat') + block + T('until') + exp) >> node('repeat-until') | + (T('while') + exp + T('do') + block + T('end')) >> node('while-loop') | + (T('do') + block + T('end')) >> node('do-block') | + (T('goto') + Name) >> node('goto') | + T('break') | + label | + functioncall | + T(';') +) >> node('stat') + + +# block ::= {stat} [retstat] +block.define((p.many(stat) + p.maybe(retstat)) >> node('block')) + + +# chunk ::= block +chunk = block + p.finished