Skip to content

Commit

Permalink
Better error message for WITH DOCKER (unlazy force execution) (#4180)
Browse files Browse the repository at this point in the history
If a run fails in WITH DOCKER you get an error message like this:
```
================================== ❌ FAILURE ===================================

./t/with-docker+fail-test1 *failed* | Repeating the failure error...
./t/with-docker+fail-test1 *failed* | --> WITH DOCKER RUN --privileged echo "dummy" && false
./t/with-docker+fail-test1 *failed* | Starting dockerd with data root /var/earthly/dind/7b5da64c458d405fff1f21d3a985f6812f9eaad6af9c6eb0d2b8c1ca7422a2e2/tmp.lbamkM
./t/with-docker+fail-test1 *failed* | dummy
./t/with-docker+fail-test1 *failed* | ERROR tests/with-docker/Earthfile:186:8
./t/with-docker+fail-test1 *failed* |       The command
./t/with-docker+fail-test1 *failed* |           WITH DOCKER RUN --privileged echo "dummy" && false
./t/with-docker+fail-test1 *failed* |       did not complete successfully. Exit code 1
./t/with-docker+fail-test1 | unlazy force execution: process "/bin/sh -c 
EARTHLY_DOCKERD_DATA_ROOT=\"/var/earthly/dind/7b5da64c458d405fff1f21d3a985f6812f9eaad6af9c6eb0d2b8c1
ca7422a2e2\" EARTHLY_DOCKERD_CACHE_DATA=\"false\" EARTHLY_DOCKER_LOAD_FILES=\"\" 
EARTHLY_IMAGES_WITH_DIGESTS=\"\" EARTHLY_START_COMPOSE=\"false\" EARTHLY_COMPOSE_FILES=\"\" 
EARTHLY_COMPOSE_SERVICES=\"\" DIND_IMAGE=earthly/dind:alpine-3.19-docker-25.0.5-r0 
OTEL_TRACES_EXPORTER=none PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin /var/earthly/dockerd-
wrapper.sh execute /usr/bin/earth_debugger /bin/sh -c 'echo \"dummy\" && false'" did not complete successfully: exit 
code: 1
```

The last part is confusing:
```
unlazy force execution: process "/bin/sh -c 
EARTHLY_DOCKERD_DATA_ROOT=\"/var/earthly/dind/7b5da64c458d405fff1f21d3a985f6812f9eaad6af9c6eb0d2b8c1
ca7422a2e2\" EARTHLY_DOCKERD_CACHE_DATA=\"false\" EARTHLY_DOCKER_LOAD_FILES=\"\" 
EARTHLY_IMAGES_WITH_DIGESTS=\"\" EARTHLY_START_COMPOSE=\"false\" EARTHLY_COMPOSE_FILES=\"\" 
EARTHLY_COMPOSE_SERVICES=\"\" DIND_IMAGE=earthly/dind:alpine-3.19-docker-25.0.5-r0 
OTEL_TRACES_EXPORTER=none PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin /var/earthly/dockerd-
wrapper.sh execute /usr/bin/earth_debugger /bin/sh -c 'echo \"dummy\" && false'" did not complete successfully: exit 
code: 1
```
In some conditions, only `unlazy force execution` generates an error, so
we need it there, even if duplicated in this case.

With this change the last error is properly formatted:
```
/U/a/s/gha_earthly_test+fail1 |       The command
/U/a/s/gha_earthly_test+fail1 |           WITH DOCKER RUN
/U/a/s/gha_earthly_test+fail1 |       did not complete successfully. Exit code 1
```

I also changed vertormon.parseError to use templates and added some
tests around the error type and exit code parsing.
  • Loading branch information
adamgordonbell authored Jun 7, 2024
1 parent 7425fbf commit 95851ea
Show file tree
Hide file tree
Showing 4 changed files with 273 additions and 79 deletions.
8 changes: 7 additions & 1 deletion earthfile2llb/with_docker_run_local_reg.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@ package earthfile2llb
import (
"context"
"fmt"
"time"

"github.com/earthly/cloud-api/logstream"
"github.com/earthly/earthly/domain"
"github.com/earthly/earthly/logbus/solvermon"
"github.com/earthly/earthly/states"
"github.com/earthly/earthly/util/containerutil"
"github.com/earthly/earthly/util/syncutil/semutil"
Expand Down Expand Up @@ -42,7 +45,10 @@ func (w *withDockerRunLocalReg) Run(ctx context.Context, args []string, opt With
}

defer func() {
cmd.SetEndError(retErr)
if retErr != nil {
message := solvermon.FormatError("WITH DOCKER RUN", retErr.Error())
cmd.SetEnd(time.Now(), logstream.RunStatus_RUN_STATUS_FAILURE, message)
}
}()

var imagesToBuild []*states.ImageDef
Expand Down
8 changes: 7 additions & 1 deletion earthfile2llb/with_docker_run_reg.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@ import (
"fmt"
"sort"
"strings"
"time"

"github.com/earthly/cloud-api/logstream"
"github.com/earthly/earthly/domain"
"github.com/earthly/earthly/logbus/solvermon"
"github.com/earthly/earthly/states"
"github.com/earthly/earthly/util/llbutil/pllb"
"github.com/earthly/earthly/util/platutil"
Expand Down Expand Up @@ -126,7 +129,10 @@ func (w *withDockerRunRegistry) Run(ctx context.Context, args []string, opt With
}

defer func() {
cmd.SetEndError(retErr)
if retErr != nil {
message := solvermon.FormatError("WITH DOCKER RUN", retErr.Error())
cmd.SetEnd(time.Now(), logstream.RunStatus_RUN_STATUS_FAILURE, message)
}
}()

err = w.installDeps(ctx, opt)
Expand Down
205 changes: 128 additions & 77 deletions logbus/solvermon/vertexmon.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,111 +37,162 @@ type vertexMonitor struct {
isCanceled bool
}

var reErrExitCode = regexp.MustCompile(`^(?:process ".*" did not complete successfully|error calling LocalhostExec): exit code: (?P<exit_code>[0-9]+)$`)
var reErrNotFound = regexp.MustCompile(`^failed to calculate checksum of ref ([^ ]*): (.*)$`)
var reHint = regexp.MustCompile(`^(?P<msg>.+?):Hint: .+`)
var reErrExitCode = regexp.MustCompile(`(?:process ".*" did not complete successfully|error calling LocalhostExec): exit code: (?P<exit_code>[0-9]+)$`)

func (vm *vertexMonitor) Write(dt []byte, ts time.Time, stream int) (int, error) {
if stream == BuildkitStatsStream {
stats, err := vm.ssp.Parse(dt)
var errNoExitCodeOMM = errors.New("no exit code, process was killed due to OOM")
var errNoExitCode = errors.New("no exit code in error message")

// getExitCode returns the exit code (int), whether one was found (bool), and an error if the exit code was invalid
func getExitCode(errString string) (int, error) {
if matches, _ := stringutil.NamedGroupMatches(errString, reErrExitCode); len(matches["exit_code"]) == 1 {
exitCodeMatch := matches["exit_code"][0]
exitCode, err := strconv.ParseUint(exitCodeMatch, 10, 32)
if err != nil {
return 0, errors.Wrap(err, "failed decoding stats stream")
return 0, err
}
for _, statsSample := range stats {
statsJSON, err := json.Marshal(statsSample)
if err != nil {
return 0, errors.Wrap(err, "stats json encode failed")
}
_, err = vm.cp.Write(statsJSON, ts, int32(stream))
if err != nil {
return 0, errors.Wrap(err, "write stats")
}
// See https://github.com/earthly/buildkit/commit/9b0bdb600641f3dd1d96f54ac2d86581ab6433b2
if exitCode == math.MaxUint32 {
return 0, errNoExitCodeOMM
}
return len(dt), nil
}
_, err := vm.cp.Write(dt, ts, int32(stream))
if err != nil {
return 0, errors.Wrap(err, "write log line")
if exitCode > 255 {
return 0, fmt.Errorf("exit code %d out of expected range (0-255)", exitCode)
}
exitCodeByte := exitCode & 0xFF
return int(exitCodeByte), nil
}
return len(dt), nil
return 0, errNoExitCode
}

func (vm *vertexMonitor) parseError() {
errString := vm.vertex.Error
indentOp := strings.Join(strings.Split(vm.operation, "\n"), "\n ")
internalStr := ""
if vm.meta.Internal {
internalStr = " internal"
var reErrNotFound = regexp.MustCompile(`^failed to calculate checksum of ref ([^ ]*): (.*)$`)
var reHint = regexp.MustCompile(`^(?P<msg>.+?):Hint: .+`)

// determineFatalErrorType returns logstream.FailureType
// and whether or not its a Fatal Error
func determineFatalErrorType(errString string, exitCode int, exitParseErr error) (logstream.FailureType, bool) {
if strings.Contains(errString, "context canceled") || errString == "no active sessions" {
return logstream.FailureType_FAILURE_TYPE_UNKNOWN, false
}
if exitParseErr == errNoExitCodeOMM {
return logstream.FailureType_FAILURE_TYPE_OOM_KILLED, true
} else if exitParseErr != nil && exitParseErr != errNoExitCode {
// We have an exit code, and can't parse it
return logstream.FailureType_FAILURE_TYPE_UNKNOWN, true
}
if exitCode > 0 {
return logstream.FailureType_FAILURE_TYPE_NONZERO_EXIT, true
}
if reErrNotFound.MatchString(errString) {
return logstream.FailureType_FAILURE_TYPE_FILE_NOT_FOUND, true
}
if strings.Contains(errString, errutil.EarthlyGitStdErrMagicString) {
return logstream.FailureType_FAILURE_TYPE_GIT, true
}
return logstream.FailureType_FAILURE_TYPE_UNKNOWN, false
}

func formatErrorMessage(errString, operation string, internal bool, fatalErrorType logstream.FailureType, exitCode int) string {
if matches, _ := stringutil.NamedGroupMatches(errString, reHint); len(matches["msg"]) == 1 {
errString = matches["msg"][0]
}
switch {
case strings.Contains(errString, "context canceled"):
vm.isCanceled = true
vm.errorStr = "WARN: Canceled"
return
case reErrExitCode.MatchString(errString):
matches, _ := stringutil.NamedGroupMatches(errString, reErrExitCode)
exitCodeMatch := matches["exit_code"][0]

// Ignore the parse error as default case will print it as a string using
// the source, so we won't miss any data.
exitCode, _ := strconv.ParseUint(exitCodeMatch, 10, 32)
switch exitCode {
case math.MaxUint32:
errString = fmt.Sprintf(""+
" The%s command\n"+
internalStr := ""
if internal {
internalStr = " internal"
}
errString = fmt.Sprintf("%s%s", internalStr, errString)

switch fatalErrorType {
case logstream.FailureType_FAILURE_TYPE_OOM_KILLED:
return fmt.Sprintf(
" The%s command\n"+
" %s\n"+
" was terminated because the build system ran out of memory.\n"+
" If you are using a satellite or other remote buildkit, it is the remote system that ran out of memory.",
internalStr, indentOp)
vm.fatalErrorType = logstream.FailureType_FAILURE_TYPE_OOM_KILLED
default:
errString = fmt.Sprintf(""+
" The%s command\n"+
" was terminated because the build system ran out of memory. If you are using a satellite or other remote buildkit, it is the remote system that ran out of memory.", internalStr, operation)
case logstream.FailureType_FAILURE_TYPE_NONZERO_EXIT:
return fmt.Sprintf(
" The%s command\n"+
" %s\n"+
" did not complete successfully. Exit code %s",
internalStr, indentOp, exitCodeMatch)
vm.fatalErrorType = logstream.FailureType_FAILURE_TYPE_NONZERO_EXIT
}
vm.isFatalError = true
case reErrNotFound.MatchString(errString):
" did not complete successfully. Exit code %d", internalStr, operation, exitCode)
case logstream.FailureType_FAILURE_TYPE_FILE_NOT_FOUND:
m := reErrNotFound.FindStringSubmatch(errString)
errString = fmt.Sprintf(""+
return fmt.Sprintf(
" The%s command\n"+
" %s\n"+
" failed: %s",
internalStr, indentOp, m[2])
vm.isFatalError = true
vm.fatalErrorType = logstream.FailureType_FAILURE_TYPE_FILE_NOT_FOUND
case errString == "no active sessions":
vm.isCanceled = true
errString = "WARN: Canceled"
case strings.Contains(errString, errutil.EarthlyGitStdErrMagicString):
" %s\n"+
" failed: %s", internalStr, operation, m[2])
case logstream.FailureType_FAILURE_TYPE_GIT:
gitStdErr, shorterErr, ok := errutil.ExtractEarthlyGitStdErr(errString)
if ok {
errString = fmt.Sprintf(
"The%s command '%s' failed: %s\n\n%s", internalStr, vm.operation, shorterErr, gitStdErr)
} else {
errString = fmt.Sprintf(
"The%s command '%s' failed: %s", internalStr, vm.operation, errString)
return fmt.Sprintf(
"The%s command\n"+
" %s\n"+
"failed: %s\n\n%s", internalStr, operation, shorterErr, gitStdErr)
}
vm.isFatalError = true
return fmt.Sprintf(
"The%s command\n"+
" %s\n"+
"failed: %s", internalStr, operation, errString)
default:
errString = fmt.Sprintf(
"The%s command '%s' failed: %s", internalStr, vm.operation, errString)
return fmt.Sprintf(
"The%s command\n"+
" %s\n"+
"failed: %s", internalStr, operation, errString)
}
}

func FormatError(operation string, errString string) string {
exitCode, err := getExitCode(errString)
fatalErrorType, _ := determineFatalErrorType(errString, exitCode, err)
return formatErrorMessage(errString, operation, false, fatalErrorType, exitCode)
}

func (vm *vertexMonitor) parseError() {
errString := vm.vertex.Error

indentOp := strings.Join(strings.Split(vm.operation, "\n"), "\n ")

exitCode, err := getExitCode(errString)
fatalErrorType, isFatalError := determineFatalErrorType(errString, exitCode, err)
formattedError := formatErrorMessage(errString, indentOp, vm.meta.Internal, fatalErrorType, exitCode)

// Add Error location
slString := ""
if vm.meta.SourceLocation != nil {
slString = fmt.Sprintf(
" %s:%d:%d",
vm.meta.SourceLocation.File, vm.meta.SourceLocation.StartLine,
vm.meta.SourceLocation.StartColumn)
}
if vm.isFatalError {
vm.errorStr = fmt.Sprintf("ERROR%s\n%s", slString, errString)

// Set the error string and flags on the vertexMonitor
if isFatalError {
vm.errorStr = fmt.Sprintf("ERROR%s\n%s", slString, formattedError)
} else {
vm.errorStr = fmt.Sprintf("WARN%s: %s", slString, errString)
vm.errorStr = fmt.Sprintf("WARN%s: %s", slString, formattedError)
}
vm.isFatalError = isFatalError
vm.fatalErrorType = fatalErrorType
}

func (vm *vertexMonitor) Write(dt []byte, ts time.Time, stream int) (int, error) {
if stream == BuildkitStatsStream {
stats, err := vm.ssp.Parse(dt)
if err != nil {
return 0, errors.Wrap(err, "failed decoding stats stream")
}
for _, statsSample := range stats {
statsJSON, err := json.Marshal(statsSample)
if err != nil {
return 0, errors.Wrap(err, "stats json encode failed")
}
_, err = vm.cp.Write(statsJSON, ts, int32(stream))
if err != nil {
return 0, errors.Wrap(err, "write stats")
}
}
return len(dt), nil
}
_, err := vm.cp.Write(dt, ts, int32(stream))
if err != nil {
return 0, errors.Wrap(err, "write log line")
}
return len(dt), nil
}
Loading

0 comments on commit 95851ea

Please sign in to comment.