-
Notifications
You must be signed in to change notification settings - Fork 803
/
Copy pathRLexer.php
200 lines (180 loc) · 6.74 KB
/
RLexer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
<?php
namespace Parle;
use JetBrains\PhpStorm\Immutable;
/**
* Multistate lexer class.
* Lexemes can be defined on the fly. If the particular lexer instance is meant to be used with Parle\RParser,
* the token IDs need to be taken from there. Otherwise, arbitrary token IDs can be supplied.
* Note, that Parle\Parser is not compatible with this lexer.
*
* @see RParser
* @package Parle
*/
class RLexer
{
/* Constants */
public const ICASE = 1;
public const DOT_NOT_LF = 2;
public const DOT_NOT_CRLF = 4;
public const SKIP_WS = 8;
public const MATCH_ZERO_LEN = 16;
/* Properties */
/**
* @var bool Start of input flag.
*/
public $bol = false;
/**
* @var int Lexer flags.
*/
public $flags = 0;
/**
* @var int Current lexer state, readonly.
*/
#[Immutable]
public $state = 0;
/**
* @var int Position of the latest token match, readonly.
*/
#[Immutable]
public $marker = 0;
/**
* @var int Current input offset, readonly.
*/
#[Immutable]
public $cursor = 0;
/* Methods */
/**
* Processes the next rule and prepares the resulting token data.
*
* @link https://php.net/manual/en/parle-rlexer.advance.php
* @return void
*/
public function advance(): void {}
/**
* Finalize the lexer rule set
*
* Rules, previously added with Parle\RLexer::push() are finalized.
* This method call has to be done after all the necessary rules was pushed.
* The rule set becomes read only. The lexing can begin.
*
* @link https://php.net/manual/en/parle-rlexer.build.php
* @see RLexer::push()
* @return void
*/
public function build(): void {}
/**
* Define token callback
*
* Define a callback to be invoked once lexer encounters a particular token.
*
* @see https://php.net/manual/en/parle-rlexer.callout.php
* @param int $id Token id.
* @param callable $callback Callable to be invoked. The callable doesn't receive any arguments and its return value is ignored.
* @return void
*/
public function callout(int $id, callable $callback): void {}
/**
* Pass the data for processing
*
* Consume the data for lexing.
*
* @see https://php.net/manual/en/parle-rlexer.consume.php
* @param string $data Data to be lexed.
* @return void
*/
public function consume(string $data): void {}
/**
* Dump the state machine
*
* Dump the current state machine to stdout.
*
* @see https://php.net/manual/en/parle-rlexer.dump.php
* @return void
*/
public function dump(): void {}
/**
* Retrieve the current token.
*
* @return Token
*/
public function getToken(): Token {}
/**
* Add a lexer rule
*
* Push a pattern for lexeme recognition.
* A 'start state' and 'exit state' can be specified by using a suitable signature.
*
* @param string $regex Regular expression used for token matching.
* @param int $id
* Token id. If the lexer instance is meant to be used standalone, this can be an arbitrary number.
* If the lexer instance is going to be passed to the parser, it has to be an id returned by Parle\RParser::tokenid().
* @see RParser::tokenId()
* @return void
* @link https://php.net/manual/en/parle-rlexer.push.php
*/
public function push(string $regex, int $id): void {}
/**
* Add a lexer rule
*
* Push a pattern for lexeme recognition.
* A 'start state' and 'exit state' can be specified by using a suitable signature.
*
* @param string $state State name. If '*' is used as start state, then the rule is applied to all lexer states.
* @param string $regex Regular expression used for token matching.
* @param int $id
* Token id. If the lexer instance is meant to be used standalone, this can be an arbitrary number.
* If the lexer instance is going to be passed to the parser, it has to be an id returned by Parle\RParser::tokenid().
* @see RParser::tokenId()
* @param string $newState
* New state name, after the rule was applied.
* If '.' is specified as the exit state, then the lexer state is unchanged when that rule matches.
* An exit state with '>' before the name means push. Use the signature without id for either continuation or to
* start matching, when a continuation or recursion is required.
* If '<' is specified as exit state, it means pop. In that case, the signature containing the id can be used to
* identify the match. Note that even in the case an id is specified, the rule will finish first when all the
* previous pushes popped.
* @return void
* @link https://php.net/manual/en/parle-rlexer.push.php
*/
public function push(string $state, string $regex, int $id, string $newState): void {}
/**
* Add a lexer rule
*
* Push a pattern for lexeme recognition.
* A 'start state' and 'exit state' can be specified by using a suitable signature.
*
* @param string $state State name. If '*' is used as start state, then the rule is applied to all lexer states.
* @param string $regex Regular expression used for token matching.
* @param string $newState
* New state name, after the rule was applied.
* If '.' is specified as the exit state, then the lexer state is unchanged when that rule matches.
* An exit state with '>' before the name means push. Use the signature without id for either continuation or to
* start matching, when a continuation or recursion is required.
* If '<' is specified as exit state, it means pop. In that case, the signature containing the id can be used to
* identify the match. Note that even in the case an id is specified, the rule will finish first when all the
* previous pushes popped.
* @return void
* @link https://php.net/manual/en/parle-rlexer.push.php
*/
public function push(string $state, string $regex, string $newState): void {}
/**
* Push a new start state
* This lexer type can have more than one state machine.
* This allows you to lex different tokens depending on context, thus allowing simple parsing to take place.
* Once a state pushed, it can be used with a suitable Parle\RLexer::push() signature variant.
*
* @see RLexer::push()
* @link https://php.net/manual/en/parle-rlexer.pushstate.php
* @param string $state Name of the state.
* @return int
*/
public function pushState(string $state): int {}
/**
* Reset lexer
*
* Reset lexing optionally supplying the desired offset.
*
* @param int $pos Reset position.
*/
public function reset(int $pos): void {}
}