@@ -49,7 +49,7 @@ import (
49
49
// TokenType is a top-level token classification: A word, space, comment, unknown.
50
50
type TokenType int
51
51
52
- // runeTokenClass is the type of a UTF-8 character classification: A character, quote, space, escape.
52
+ // runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
53
53
type runeTokenClass int
54
54
55
55
// the internal state used by the lexer state machine
@@ -76,7 +76,6 @@ func (a *Token) Equal(b *Token) bool {
76
76
77
77
// Named classes of UTF-8 runes
78
78
const (
79
- charRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-,|"
80
79
spaceRunes = " \t \r \n "
81
80
escapingQuoteRunes = `"`
82
81
nonEscapingQuoteRunes = "'"
@@ -87,7 +86,6 @@ const (
87
86
// Classes of rune token
88
87
const (
89
88
unknownRuneClass runeTokenClass = iota
90
- charRuneClass
91
89
spaceRuneClass
92
90
escapingQuoteRuneClass
93
91
nonEscapingQuoteRuneClass
@@ -127,7 +125,6 @@ func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenCla
127
125
// newDefaultClassifier creates a new classifier for ASCII characters.
128
126
func newDefaultClassifier () tokenClassifier {
129
127
t := tokenClassifier {}
130
- t .addRuneClass (charRunes , charRuneClass )
131
128
t .addRuneClass (spaceRunes , spaceRuneClass )
132
129
t .addRuneClass (escapingQuoteRunes , escapingQuoteRuneClass )
133
130
t .addRuneClass (nonEscapingQuoteRunes , nonEscapingQuoteRuneClass )
@@ -213,12 +210,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
213
210
{
214
211
return nil , io .EOF
215
212
}
216
- case charRuneClass :
217
- {
218
- tokenType = WordToken
219
- value = append (value , nextRune )
220
- state = inWordState
221
- }
222
213
case spaceRuneClass :
223
214
{
224
215
}
@@ -244,7 +235,9 @@ func (t *Tokenizer) scanStream() (*Token, error) {
244
235
}
245
236
default :
246
237
{
247
- return nil , fmt .Errorf ("Uknown rune: %v" , nextRune )
238
+ tokenType = WordToken
239
+ value = append (value , nextRune )
240
+ state = inWordState
248
241
}
249
242
}
250
243
}
@@ -258,10 +251,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
258
251
value : string (value )}
259
252
return token , err
260
253
}
261
- case charRuneClass , commentRuneClass :
262
- {
263
- value = append (value , nextRune )
264
- }
265
254
case spaceRuneClass :
266
255
{
267
256
t .input .UnreadRune ()
@@ -284,7 +273,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
284
273
}
285
274
default :
286
275
{
287
- return nil , fmt . Errorf ( "Uknown rune: %v" , nextRune )
276
+ value = append ( value , nextRune )
288
277
}
289
278
}
290
279
}
@@ -299,15 +288,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
299
288
value : string (value )}
300
289
return token , err
301
290
}
302
- case charRuneClass , spaceRuneClass , escapingQuoteRuneClass , nonEscapingQuoteRuneClass , escapeRuneClass , commentRuneClass :
291
+ default :
303
292
{
304
293
state = inWordState
305
294
value = append (value , nextRune )
306
295
}
307
- default :
308
- {
309
- return nil , fmt .Errorf ("Uknown rune: %v" , nextRune )
310
- }
311
296
}
312
297
}
313
298
case escapingQuotedState : // the next rune after an escape character, in double quotes
@@ -321,15 +306,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
321
306
value : string (value )}
322
307
return token , err
323
308
}
324
- case charRuneClass , spaceRuneClass , escapingQuoteRuneClass , nonEscapingQuoteRuneClass , escapeRuneClass , commentRuneClass :
309
+ default :
325
310
{
326
311
state = quotingEscapingState
327
312
value = append (value , nextRune )
328
313
}
329
- default :
330
- {
331
- return nil , fmt .Errorf ("Uknown rune: %v" , nextRune )
332
- }
333
314
}
334
315
}
335
316
case quotingEscapingState : // in escaping double quotes
@@ -343,10 +324,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
343
324
value : string (value )}
344
325
return token , err
345
326
}
346
- case charRuneClass , spaceRuneClass , nonEscapingQuoteRuneClass , commentRuneClass :
347
- {
348
- value = append (value , nextRune )
349
- }
350
327
case escapingQuoteRuneClass :
351
328
{
352
329
state = inWordState
@@ -357,7 +334,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
357
334
}
358
335
default :
359
336
{
360
- return nil , fmt . Errorf ( "Uknown rune: %v" , nextRune )
337
+ value = append ( value , nextRune )
361
338
}
362
339
}
363
340
}
@@ -372,17 +349,13 @@ func (t *Tokenizer) scanStream() (*Token, error) {
372
349
value : string (value )}
373
350
return token , err
374
351
}
375
- case charRuneClass , spaceRuneClass , escapingQuoteRuneClass , escapeRuneClass , commentRuneClass :
376
- {
377
- value = append (value , nextRune )
378
- }
379
352
case nonEscapingQuoteRuneClass :
380
353
{
381
354
state = inWordState
382
355
}
383
356
default :
384
357
{
385
- return nil , fmt . Errorf ( "Uknown rune: %v" , nextRune )
358
+ value = append ( value , nextRune )
386
359
}
387
360
}
388
361
}
@@ -396,10 +369,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
396
369
value : string (value )}
397
370
return token , err
398
371
}
399
- case charRuneClass , escapingQuoteRuneClass , escapeRuneClass , commentRuneClass , nonEscapingQuoteRuneClass :
400
- {
401
- value = append (value , nextRune )
402
- }
403
372
case spaceRuneClass :
404
373
{
405
374
if nextRune == '\n' {
@@ -414,7 +383,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
414
383
}
415
384
default :
416
385
{
417
- return nil , fmt . Errorf ( "Uknown rune: %v" , nextRune )
386
+ value = append ( value , nextRune )
418
387
}
419
388
}
420
389
}
0 commit comments