From 4f459a4cd1cb3adc3e59278a4598f8cd58c31812 Mon Sep 17 00:00:00 2001 From: Tristan Stenner Date: Wed, 27 Sep 2023 15:47:18 +0200 Subject: [PATCH] Add string trimming (#, %, ##, %%) in variable expansion Review comments and edge cases - the `${}` parser handles escapes, but needs to preserve them for `#`/`%` - but `\}` needs to be de-escaped - reversing strings need to handle escapes, i.e. `a\*c` -> `c\*a` - build the regex with a scanner, not QuoteMeta+StringReplace - add more complicated cases to the tests Separate out + unit test helper functions Add trim test to dockerfile_test Signed-off-by: Tristan Stenner --- frontend/dockerfile/dockerfile_test.go | 14 +-- frontend/dockerfile/docs/reference.md | 38 ++++++- frontend/dockerfile/shell/lex.go | 138 ++++++++++++++++++++++++- frontend/dockerfile/shell/lex_test.go | 84 +++++++++++++++ 4 files changed, 263 insertions(+), 11 deletions(-) diff --git a/frontend/dockerfile/dockerfile_test.go b/frontend/dockerfile/dockerfile_test.go index 342e11de4b26..05ec3ffdfe02 100644 --- a/frontend/dockerfile/dockerfile_test.go +++ b/frontend/dockerfile/dockerfile_test.go @@ -247,18 +247,20 @@ func testDefaultEnvWithArgs(t *testing.T, sb integration.Sandbox) { f := getFrontend(t, sb) dockerfile := []byte(` -FROM busybox AS build +ARG image=idlebox +FROM busy${image#idle} AS build ARG my_arg ENV my_arg "my_arg=${my_arg:-def_val}" +ENV my_trimmed_arg "${my_arg%%e*}" COPY myscript.sh myscript.sh -RUN ./myscript.sh $my_arg +RUN ./myscript.sh $my_arg $my_trimmed_arg FROM scratch COPY --from=build /out /out `) script := []byte(` #!/usr/bin/env sh -echo -n $my_arg $1 > /out +echo -n $my_arg $* > /out `) dir := integration.Tmpdir( @@ -278,9 +280,9 @@ echo -n $my_arg $1 > /out frontendAttrs map[string]string expected string }{ - {"nil", nil, "my_arg=def_val my_arg=def_val"}, - {"empty", map[string]string{"build-arg:my_arg": ""}, "my_arg=def_val my_arg=def_val"}, - {"override", map[string]string{"build-arg:my_arg": "override"}, "my_arg=override my_arg=override"}, + {"nil", nil, "my_arg=def_val my_arg=def_val my_arg=d"}, + {"empty", map[string]string{"build-arg:my_arg": ""}, "my_arg=def_val my_arg=def_val my_arg=d"}, + {"override", map[string]string{"build-arg:my_arg": "override"}, "my_arg=override my_arg=override my_arg=ov"}, } { t.Run(x.name, func(t *testing.T) { _, err = f.Solve(sb.Context(), c, client.SolveOpt{ diff --git a/frontend/dockerfile/docs/reference.md b/frontend/dockerfile/docs/reference.md index 312ea9f15f48..43b3ca7b218e 100644 --- a/frontend/dockerfile/docs/reference.md +++ b/frontend/dockerfile/docs/reference.md @@ -288,10 +288,46 @@ modifiers as specified below: - `${variable:+word}` indicates that if `variable` is set then `word` will be the result, otherwise the result is the empty string. +The following variable replacements are supported in a pre-release version of +Dockerfile syntax, when using the `# syntax=docker/dockerfile-upstream:master` syntax +directive in your Dockerfile: + +- `${variable#pattern}` removes the shortest match of `pattern` from `variable`, + seeking from the start of the string. + + ```bash + str=foobarbaz echo ${str#f*b} # arbaz + ``` + +- `${variable##pattern}` removes the longest match of `pattern` from `variable`, + seeking from the start of the string. + + ```bash + str=foobarbaz echo ${str##f*b} # az + ``` + +- `${variable%pattern}` removes the shortest match of `pattern` from `variable`, + seeking backwards from the end of the string. + + ```bash + string=foobarbaz echo ${string%b*} # foobar + ``` + +- `${variable%%pattern}` removes the longest match of `pattern` from `variable`, + seeking backwards from the end of the string. + + ```bash + string=foobarbaz echo ${string%%b*} # foo + ``` + In all cases, `word` can be any string, including additional environment variables. -Escaping is possible by adding a `\` before the variable: `\$foo` or `\${foo}`, +`pattern` is a glob pattern where `?` matches any single character +and `*` any number of characters (including zero). To match literal `?` and `*`, +use a backslash escape: `\?` and `\*`. + +You can escape whole variable names by adding a `\` before the variable: `\$foo` or `\${foo}`, for example, will translate to `$foo` and `${foo}` literals respectively. Example (parsed representation is displayed after the `#`): diff --git a/frontend/dockerfile/shell/lex.go b/frontend/dockerfile/shell/lex.go index 80806f8ba778..3a487c0d538d 100644 --- a/frontend/dockerfile/shell/lex.go +++ b/frontend/dockerfile/shell/lex.go @@ -3,6 +3,7 @@ package shell import ( "bytes" "fmt" + "regexp" "strings" "text/scanner" "unicode" @@ -100,7 +101,7 @@ type shellWord struct { } func (sw *shellWord) process(source string) (string, []string, error) { - word, words, err := sw.processStopOn(scanner.EOF) + word, words, err := sw.processStopOn(scanner.EOF, sw.rawEscapes) if err != nil { err = errors.Wrapf(err, "failed to process %q", source) } @@ -154,7 +155,7 @@ func (w *wordsStruct) getWords() []string { // Process the word, starting at 'pos', and stop when we get to the // end of the word or the 'stopChar' character -func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) { +func (sw *shellWord) processStopOn(stopChar rune, rawEscapes bool) (string, []string, error) { var result bytes.Buffer var words wordsStruct @@ -166,6 +167,14 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) { charFuncMapping['"'] = sw.processDoubleQuote } + // temporarily set sw.rawEscapes if needed + if rawEscapes != sw.rawEscapes { + sw.rawEscapes = rawEscapes + defer func() { + sw.rawEscapes = !rawEscapes + }() + } + for sw.scanner.Peek() != scanner.EOF { ch := sw.scanner.Peek() @@ -351,8 +360,9 @@ func (sw *shellWord) processDollar() (string, error) { ch = sw.scanner.Next() chs += string(ch) fallthrough - case '+', '-', '?': - word, _, err := sw.processStopOn('}') + case '+', '-', '?', '#', '%': + rawEscapes := ch == '#' || ch == '%' + word, _, err := sw.processStopOn('}', rawEscapes) if err != nil { if sw.scanner.Peek() == scanner.EOF { return "", errors.New("syntax error: missing '}'") @@ -394,6 +404,18 @@ func (sw *shellWord) processDollar() (string, error) { return "", errors.Errorf("%s: %s", name, message) } return value, nil + case '%', '#': + // %/# matches the shortest pattern expansion, %%/## the longest + greedy := false + if word[0] == byte(ch) { + greedy = true + word = word[1:] + } + + if ch == '%' { + return trimSuffix(word, value, greedy) + } + return trimPrefix(word, value, greedy) default: return "", errors.Errorf("unsupported modifier (%s) in substitution", chs) } @@ -472,3 +494,111 @@ func BuildEnvs(env []string) map[string]string { return envs } + +// convertShellPatternToRegex converts a shell-like wildcard pattern +// (? is a single char, * either the shortest or longest (greedy) string) +// to an equivalent regular expression. +// +// Based on +// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 +// but without the bracket expressions (`[]`) +func convertShellPatternToRegex(pattern string, greedy bool) (*regexp.Regexp, error) { + var s scanner.Scanner + s.Init(strings.NewReader(pattern)) + var out strings.Builder + out.Grow(len(pattern) + 4) + + // match only at the beginning of the string + out.WriteByte('^') + + // default: non-greedy wildcards + starPattern := ".*?" + if greedy { + starPattern = ".*" + } + + for tok := s.Next(); tok != scanner.EOF; tok = s.Next() { + switch tok { + case '*': + out.WriteString(starPattern) + continue + case '?': + out.WriteByte('.') + continue + case '\\': + // } as part of ${} needs to be escaped, but the escape isn't part + // of the pattern + if s.Peek() == '}' { + continue + } + out.WriteRune('\\') + tok = s.Next() + if tok != '*' && tok != '?' && tok != '\\' { + return nil, errors.Errorf("invalid escape '\\%c'", tok) + } + // regex characters that need to be escaped + // escaping closing is optional, but done for consistency + case '[', ']', '{', '}', '.', '+', '(', ')', '|', '^', '$': + out.WriteByte('\\') + } + out.WriteRune(tok) + } + return regexp.Compile(out.String()) +} + +func trimPrefix(word, value string, greedy bool) (string, error) { + re, err := convertShellPatternToRegex(word, greedy) + if err != nil { + return "", errors.Errorf("invalid pattern (%s) in substitution: %s", word, err) + } + + if idx := re.FindStringIndex(value); idx != nil { + value = value[idx[1]:] + } + return value, nil +} + +// reverse without avoid reversing escapes, i.e. a\*c -> c\*a +func reversePattern(pattern string) string { + patternRunes := []rune(pattern) + out := make([]rune, len(patternRunes)) + lastIdx := len(patternRunes) - 1 + for i := 0; i <= lastIdx; { + tok := patternRunes[i] + outIdx := lastIdx - i + if tok == '\\' && i != lastIdx { + out[outIdx-1] = tok + // the pattern is taken from a ${var#pattern}, so the last + // character can't be an escape character + out[outIdx] = patternRunes[i+1] + i += 2 + } else { + out[outIdx] = tok + i++ + } + } + return string(out) +} + +func reverseString(str string) string { + out := []rune(str) + outIdx := len(out) - 1 + for i := 0; i < outIdx; i++ { + out[i], out[outIdx] = out[outIdx], out[i] + outIdx-- + } + return string(out) +} + +func trimSuffix(pattern, word string, greedy bool) (string, error) { + // regular expressions can't handle finding the shortest rightmost + // string so we reverse both search space and pattern to convert it + // to a leftmost search in both cases + pattern = reversePattern(pattern) + word = reverseString(word) + str, err := trimPrefix(pattern, word, greedy) + if err != nil { + return "", err + } + return reverseString(str), nil +} diff --git a/frontend/dockerfile/shell/lex_test.go b/frontend/dockerfile/shell/lex_test.go index eae71e2bcaa8..6081f7d3f354 100644 --- a/frontend/dockerfile/shell/lex_test.go +++ b/frontend/dockerfile/shell/lex_test.go @@ -10,6 +10,45 @@ import ( "github.com/stretchr/testify/require" ) +func TestConvertShellPatternToRegex(t *testing.T) { + cases := map[string]string{ + "*": "^.*", + "?": "^.", + "\\*": "^\\*", + "(()[]{\\}^$.\\*\\?|\\\\": "^\\(\\(\\)\\[\\]\\{\\}\\^\\$\\.\\*\\?\\|\\\\", + } + for pattern, expected := range cases { + res, err := convertShellPatternToRegex(pattern, true) + require.NoError(t, err) + require.Equal(t, expected, res.String()) + } + invalid := []string{ + "\\", "\\x", "\\\\\\", + } + for _, pattern := range invalid { + _, err := convertShellPatternToRegex(pattern, true) + require.Error(t, err) + } +} + +func TestReverseString(t *testing.T) { + require.Equal(t, "12345", reverseString("54321")) + require.Equal(t, "👽🚀🖖", reverseString("🖖🚀👽")) +} + +func TestReversePattern(t *testing.T) { + cases := map[string]string{ + "a\\*c": "c\\*a", + "\\\\\\ab": "b\\a\\\\", + "ab\\": "\\ba", + "👽\\🚀🖖": "🖖\\🚀👽", + "\\\\b": "b\\\\", + } + for pattern, expected := range cases { + require.Equal(t, expected, reversePattern(pattern)) + } +} + func TestShellParserMandatoryEnvVars(t *testing.T) { var newWord string var err error @@ -358,6 +397,51 @@ func TestProcessWithMatches(t *testing.T) { }, expectedErr: true, }, + { + // special characters in regular expressions + // } needs to be escaped so it doesn't match the + // closing brace of ${} + input: "${FOO#()[]{\\}^$.\\*\\?|\\\\}", + envs: map[string]string{"FOO": "()[]{}^$.*?|\\x"}, + expected: "x", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${FOO%%\\**}", + envs: map[string]string{"FOO": "xx**"}, + expected: "xx", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${FOO#*x*y}", + envs: map[string]string{"FOO": "xxyy"}, + expected: "y", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${FOO##*x}", + envs: map[string]string{"FOO": "xxyy"}, + expected: "yy", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${FOO#?\\?}", + envs: map[string]string{"FOO": "???y"}, + expected: "?y", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${ABC:-.}${FOO%x}${ABC:-.}", + envs: map[string]string{"FOO": "xxyy"}, + expected: ".xxyy.", + matches: map[string]struct{}{"FOO": {}}, + }, + { + input: "${FOO%%\\**\\*}", + envs: map[string]string{"FOO": "a***yy*"}, + expected: "a", + matches: map[string]struct{}{"FOO": {}}, + }, } for _, c := range tc {