-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer_lex.py
121 lines (104 loc) · 1.92 KB
/
lexer_lex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#from ply import lex
from ply.ply import lex
reserved = {
'and': 'AND',
'not': 'NOT',
'or': 'OR',
'use': 'USE',
'print': 'PRINT',
'my' : 'MY',
'until' : 'UNTIL',
'foreach' : 'FOREACH'
}
tokens = (
"NUMBER",
"STRING",
"SEMI",
"ID",
"VARNAME",
"ARRNAME",
"LT",
"GT",
"OP",
"CL",
"INDEXOP",
"INDEXCL",
"BLOCKOP",
"BLOCKCL",
"COMMA",
"EQ",
"INCREMENT",
"DECREMENT",
"PLS",
"MIN",
"MUL",
"DIV",
"COMMENT",
) + tuple(reserved.values())
t_MIN = r'\-'
t_MUL = r'\*'
t_DIV = r'/'
t_SEMI = r'\;'
t_LT = r'\<'
t_GT = r'\>'
t_EQ = r'\='
t_OP = r'\('
t_CL = r'\)'
t_INDEXOP = r'\['
t_INDEXCL = r'\]'
t_BLOCKOP = r'\{'
t_BLOCKCL = r'\}'
t_COMMA = r'\,'
# t_STRING = r'\".*\"'
# regex rules + other actions
def t_STRING(t):
r'\"[^\"]*\"'
val = t.value.strip("\"")
t.value = (val, "STRING")
return t
def t_NUMBER(t):
r'\d+(\.\d+)?'
t.value = (float(t.value), "NUMBER")
return t
def t_newline(t):
r'\n'
# print("t.value =", len(t.value))
t.lexer.lineno += len(t.value)
t_ignore = '\t '
def t_error(t):
print("Illegal character '%s'"%t.value[0])
t.lexer.skip(1)
def t_ID(t):
r'[a-zA-Z_][a-zA-Z_0-9]*'
t.type = reserved.get(t.value, 'ID')
return t
def t_VARNAME(t):
r'\$[a-zA-Z_][a-zA-Z_0-9]*'
t.value = (t.value, "VARNAME")
return t
def t_PLS(t):
r'\+'
return t
def t_ARRNAME(t):
r'\@[a-zA-Z_][a-zA-Z_0-9]*'
return t
def t_INCREMENT(t):
r'(\++)'
return t
def t_DECREMENT(t):
r'(\--)'
return t
def t_COMMENT(t):
r'\#.*'
pass
if __name__ == "__main__":
lexer = lex.lex()
data = None
with open("test.pl", encoding = 'utf-8') as f:
data = f.read()
lexer.input(data)
while True:
tok = lexer.token()
if not tok:
break
print(tok.type, tok.value, tok.lineno, tok.lexpos)