Skip to content
This repository was archived by the owner on Dec 29, 2022. It is now read-only.

Commit 6f45313

Browse files
author
Steven Thurgood
committed
Modify shlex to remove the charRuneClass. This makes it behave more like shlex.py with whitespace_split=True
1 parent 6f9e655 commit 6f45313

File tree

2 files changed

+16
-46
lines changed

2 files changed

+16
-46
lines changed

shlex.go

+10-41
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ import (
4949
// TokenType is a top-level token classification: A word, space, comment, unknown.
5050
type TokenType int
5151

52-
// runeTokenClass is the type of a UTF-8 character classification: A character, quote, space, escape.
52+
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
5353
type runeTokenClass int
5454

5555
// the internal state used by the lexer state machine
@@ -76,7 +76,6 @@ func (a *Token) Equal(b *Token) bool {
7676

7777
// Named classes of UTF-8 runes
7878
const (
79-
charRunes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._-,|"
8079
spaceRunes = " \t\r\n"
8180
escapingQuoteRunes = `"`
8281
nonEscapingQuoteRunes = "'"
@@ -87,7 +86,6 @@ const (
8786
// Classes of rune token
8887
const (
8988
unknownRuneClass runeTokenClass = iota
90-
charRuneClass
9189
spaceRuneClass
9290
escapingQuoteRuneClass
9391
nonEscapingQuoteRuneClass
@@ -127,7 +125,6 @@ func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenCla
127125
// newDefaultClassifier creates a new classifier for ASCII characters.
128126
func newDefaultClassifier() tokenClassifier {
129127
t := tokenClassifier{}
130-
t.addRuneClass(charRunes, charRuneClass)
131128
t.addRuneClass(spaceRunes, spaceRuneClass)
132129
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
133130
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
@@ -213,12 +210,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
213210
{
214211
return nil, io.EOF
215212
}
216-
case charRuneClass:
217-
{
218-
tokenType = WordToken
219-
value = append(value, nextRune)
220-
state = inWordState
221-
}
222213
case spaceRuneClass:
223214
{
224215
}
@@ -244,7 +235,9 @@ func (t *Tokenizer) scanStream() (*Token, error) {
244235
}
245236
default:
246237
{
247-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
238+
tokenType = WordToken
239+
value = append(value, nextRune)
240+
state = inWordState
248241
}
249242
}
250243
}
@@ -258,10 +251,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
258251
value: string(value)}
259252
return token, err
260253
}
261-
case charRuneClass, commentRuneClass:
262-
{
263-
value = append(value, nextRune)
264-
}
265254
case spaceRuneClass:
266255
{
267256
t.input.UnreadRune()
@@ -284,7 +273,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
284273
}
285274
default:
286275
{
287-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
276+
value = append(value, nextRune)
288277
}
289278
}
290279
}
@@ -299,15 +288,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
299288
value: string(value)}
300289
return token, err
301290
}
302-
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
291+
default:
303292
{
304293
state = inWordState
305294
value = append(value, nextRune)
306295
}
307-
default:
308-
{
309-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
310-
}
311296
}
312297
}
313298
case escapingQuotedState: // the next rune after an escape character, in double quotes
@@ -321,15 +306,11 @@ func (t *Tokenizer) scanStream() (*Token, error) {
321306
value: string(value)}
322307
return token, err
323308
}
324-
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, nonEscapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
309+
default:
325310
{
326311
state = quotingEscapingState
327312
value = append(value, nextRune)
328313
}
329-
default:
330-
{
331-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
332-
}
333314
}
334315
}
335316
case quotingEscapingState: // in escaping double quotes
@@ -343,10 +324,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
343324
value: string(value)}
344325
return token, err
345326
}
346-
case charRuneClass, spaceRuneClass, nonEscapingQuoteRuneClass, commentRuneClass:
347-
{
348-
value = append(value, nextRune)
349-
}
350327
case escapingQuoteRuneClass:
351328
{
352329
state = inWordState
@@ -357,7 +334,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
357334
}
358335
default:
359336
{
360-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
337+
value = append(value, nextRune)
361338
}
362339
}
363340
}
@@ -372,17 +349,13 @@ func (t *Tokenizer) scanStream() (*Token, error) {
372349
value: string(value)}
373350
return token, err
374351
}
375-
case charRuneClass, spaceRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass:
376-
{
377-
value = append(value, nextRune)
378-
}
379352
case nonEscapingQuoteRuneClass:
380353
{
381354
state = inWordState
382355
}
383356
default:
384357
{
385-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
358+
value = append(value, nextRune)
386359
}
387360
}
388361
}
@@ -396,10 +369,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
396369
value: string(value)}
397370
return token, err
398371
}
399-
case charRuneClass, escapingQuoteRuneClass, escapeRuneClass, commentRuneClass, nonEscapingQuoteRuneClass:
400-
{
401-
value = append(value, nextRune)
402-
}
403372
case spaceRuneClass:
404373
{
405374
if nextRune == '\n' {
@@ -414,7 +383,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
414383
}
415384
default:
416385
{
417-
return nil, fmt.Errorf("Uknown rune: %v", nextRune)
386+
value = append(value, nextRune)
418387
}
419388
}
420389
}

shlex_test.go

+6-5
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,12 @@ import (
2424
var (
2525
// one two "three four" "five \"six\"" seven#eight # nine # ten
2626
// eleven 'twelve\'
27-
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\'"
27+
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
2828
)
2929

3030
func TestClassifier(t *testing.T) {
3131
classifier := newDefaultClassifier()
3232
tests := map[rune]runeTokenClass{
33-
'a': charRuneClass,
3433
' ': spaceRuneClass,
3534
'"': escapingQuoteRuneClass,
3635
'\'': nonEscapingQuoteRuneClass,
@@ -53,7 +52,9 @@ func TestTokenizer(t *testing.T) {
5352
&Token{WordToken, "seven#eight"},
5453
&Token{CommentToken, " nine # ten"},
5554
&Token{WordToken, "eleven"},
56-
&Token{WordToken, "twelve\\"}}
55+
&Token{WordToken, "twelve\\"},
56+
&Token{WordToken, "thirteen=13"},
57+
&Token{WordToken, "fourteen/14"}}
5758

5859
tokenizer := NewTokenizer(testInput)
5960
for i, want := range expectedTokens {
@@ -69,7 +70,7 @@ func TestTokenizer(t *testing.T) {
6970

7071
func TestLexer(t *testing.T) {
7172
testInput := strings.NewReader(testString)
72-
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"}
73+
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
7374

7475
lexer := NewLexer(testInput)
7576
for i, want := range expectedStrings {
@@ -84,7 +85,7 @@ func TestLexer(t *testing.T) {
8485
}
8586

8687
func TestSplit(t *testing.T) {
87-
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\"}
88+
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
8889
got, err := Split(testString)
8990
if err != nil {
9091
t.Error(err)

0 commit comments

Comments
 (0)