From ff7b8a5db6122878cbc94bccd6b3ac89be6fca5f Mon Sep 17 00:00:00 2001 From: Ed Warnicke Date: Mon, 28 Sep 2020 09:54:27 -0500 Subject: [PATCH] Create WithGracePeriod,WithOnDeathSignalChildren options Signed-off-by: Ed Warnicke --- .github/workflows/ci.yaml | 8 ++- .gitignore | 2 + .golangci.yml | 4 +- README.md | 35 +++++++++- exechelper.go | 139 ++++++++++++++++++++++++++++++------- exechelper_test.go | 79 +++++++++++++++++++++ options.go | 9 +++ options_linux.go | 33 +++++++++ testcmds/afterterm/main.go | 51 ++++++++++++++ 9 files changed, 331 insertions(+), 29 deletions(-) create mode 100644 options_linux.go create mode 100644 testcmds/afterterm/main.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1536c03..d04eefe 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,6 +1,12 @@ --- name: ci -on: [pull_request, push] +on: + push: + branches: + - master + pull_request: + branches: + - master jobs: build: name: build diff --git a/.gitignore b/.gitignore index ee770a6..73cfeb6 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ # vendor/ .idea/ +afterterm +!testcmds/afterterm diff --git a/.golangci.yml b/.golangci.yml index 9763796..ae1e7c5 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -5,6 +5,8 @@ run: issues-exit-code: 1 tests: true linters-settings: + funlen: + lines: 80 gosec: settings: exclude: "G204" @@ -22,8 +24,6 @@ linters-settings: - (github.com/sirupsen/logrus.FieldLogger).Fatalf golint: min-confidence: 0.8 - goimports: - local-prefixes: github.com/networkservicemesh/cmd-forwarder-vppagent gocyclo: min-complexity: 15 maligned: diff --git a/README.md b/README.md index bf4c464..59fd957 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Its main features are: 2. It's Start() returns an errCh that will get zero or one errors, and be closed after the command has finished running 3. You can use the WithXYZ pattern to do things like customize things like Stdout, Stdin, StdError, Dir, and Env variables -Run Examples: +## Run Examples: ```go if err := exechelper.Run("go list -m");err != nil {...} @@ -20,7 +20,7 @@ errBuffer := bytes.NewBuffer([]bytes{}) if err := exechelper.Run("go list -m",WithStdout(outputBuffer),WithStderr(errBuffer));err != nil {...} ``` -Start Examples +## Start Examples ```go ctx,cancel := context.WithCancel(context.Background()) @@ -34,3 +34,34 @@ errCh := exechelper.Start(startContext,"spire-server run",WithContext(ctx)) Similarly, exechelper.Output(...), exechelper.CombinedOutput(...) are provided. +## Multiplexing Stdout/Stderr + +WithStdout and WithStderr can be used multiple times, with each provided io.Writer provided getting a copy of the stdout/stderr. +As many WithStdout or WithStderr options may be used on the same Run/Start/Output/CombinedOutput as you wish + +Example: + +```go +outputBuffer := bytes.NewBuffer([]byte{}) +errBuffer := bytes.NewBuffer([]bytes{}) +if err := exechelper.Run("go list -m",WithStdout(outputBuffer),WithStderr(errBuffer),WithStdout(os.Stdout),WithStderr(os.Stderr));err != nil {...} +// stdout of "go list -m" is written to both outputBuffer and os.Stdout. stderr is written to both errBuffer and os.Stdout +``` + +## Terminating with SIGTERM and a grace period + +By default, canceling the context on a Start will result in SIGKILL being sent to the child process (this is how exec.Cmd works). +It is often useful to send SIGTERM first, and allow a grace period before sending SIGKILL to allow processes the opportunity +to clean themselves up. + +Example: +```go +ctx,cancel := context.WithCancel(context.Background()) +errCh := exechelper.Start(startContext,"spire-server run",WithContext(ctx),WithGracePeriod(30*time.Second)) +cancel() +<-errCh +// Calling cancel will send SIGTERM to the spire-server and wait up to 30 seconds for it to exit before sending SIGKILL +// errCh will receive any errors from the exit of spire-server immediately after spire-server exiting, whether that +// is immediately, any time during the 30 second grace period, or after the SIGKILL is sent +``` + diff --git a/exechelper.go b/exechelper.go index a0f2c04..f8251df 100644 --- a/exechelper.go +++ b/exechelper.go @@ -21,8 +21,11 @@ import ( "bytes" "context" "os/exec" + "syscall" + "time" "github.com/google/shlex" + "github.com/pkg/errors" ) // Run - Creates a exec.Cmd using cmdStr. Runs exec.Cmd.Run and returns the resulting error @@ -34,22 +37,104 @@ func Run(cmdStr string, options ...*Option) error { func Start(cmdStr string, options ...*Option) <-chan error { errCh := make(chan error, 1) + // Extract context from options + optionCtx := extractContextFromOptions(options) + + // Extract graceperiod from options + graceperiod, err := extractGracePeriodFromOptions(optionCtx, options) + if err != nil { + errCh <- err + close(errCh) + return errCh + } + + // By default, the context passed to StartContext (ie, cmdCtx) is the same as the context we got from the options + // (ie, optionsCtx) + cmdCtx := optionCtx + + // But if we have a graceperiod, we need a separate cmdCtx and cmdCancel so we can insert our SIGTERM + // between the optionsCtx.Done() and time.After(graceperiod) before *actually* canceling the cmdCtx + // and thus sending SIGKILL to the cmd + var cmdCancel context.CancelFunc + if graceperiod != 0 { + cmdCtx, cmdCancel = context.WithCancel(context.Background()) + } + + cmd, err := constructCommand(cmdCtx, cmdStr, options) + if err != nil { + errCh <- err + close(errCh) + if cmdCancel != nil { + cmdCancel() + } + return errCh + } + + // Start the *exec.Cmd + if err = cmd.Start(); err != nil { + errCh <- err + close(errCh) + if cmdCancel != nil { + cmdCancel() + } + return errCh + } + + // By default, the error channel we send any error from the wait to (waitErrCh) is the one we return (errCh) + waitErrCh := errCh + + // But if we have a graceperiod and a cmdCancel, we need a distinct waitErrCh from the one we return, + // so that we can select on waitErrCh after sending SIGTERM and then forward any errors to errCh + if cmdCancel != nil && graceperiod > 0 { + waitErrCh = make(chan error, len(errCh)) + } + + // Collect the wait + go func(waitErrCh chan error) { + if err := cmd.Wait(); err != nil { + waitErrCh <- err + } + close(waitErrCh) + }(waitErrCh) + + // Handle SIGTERM and graceperiod + if cmdCancel != nil && graceperiod > 0 { + go handleGracePeriod(optionCtx, cmd, cmdCancel, graceperiod, waitErrCh, errCh) + } + + return errCh +} + +func extractGracePeriodFromOptions(ctx context.Context, options []*Option) (time.Duration, error) { + var graceperiod time.Duration + for _, option := range options { + if option.GracePeriod != 0 { + graceperiod = option.GracePeriod + if ctx == nil { + return 0, errors.New("graceperiod cannot be set without WithContext option") + } + } + } + return graceperiod, nil +} + +func extractContextFromOptions(options []*Option) context.Context { // Set the context - var ctx context.Context + var optionCtx context.Context for _, option := range options { if option.Context != nil { - ctx = option.Context + optionCtx = option.Context } } + return optionCtx +} +func constructCommand(ctx context.Context, cmdStr string, options []*Option) (*exec.Cmd, error) { // Construct the command args args, err := shlex.Split(cmdStr) if err != nil { - errCh <- err - close(errCh) - return errCh + return nil, err } - // Create the *exec.Cmd var cmd *exec.Cmd switch ctx { @@ -63,30 +148,36 @@ func Start(cmdStr string, options ...*Option) <-chan error { for _, option := range options { // Apply the CmdOptions if option.CmdOption != nil { - if err = option.CmdOption(cmd); err != nil { - errCh <- err - close(errCh) - return errCh + if err := option.CmdOption(cmd); err != nil { + return nil, err } } } + return cmd, nil +} - // Start the *exec.Cmd - if err = cmd.Start(); err != nil { - errCh <- err - close(errCh) - return errCh - } +func handleGracePeriod(optionCtx context.Context, cmd *exec.Cmd, cmdCancel context.CancelFunc, graceperiod time.Duration, waitErrCh <-chan error, errCh chan<- error) { + // Wait for the optionCtx to be done + <-optionCtx.Done() - // Collect the wait - go func(chan error) { - if err := cmd.Wait(); err != nil { - errCh <- err - } - close(errCh) - }(errCh) + // Send SIGTERM + _ = cmd.Process.Signal(syscall.SIGTERM) - return errCh + // Wait for either the waitErrCh to be closed or have an error (ie, cmd exited) or graceperiod + // either way + select { + case <-waitErrCh: + case <-time.After(graceperiod): + } + // Cancel the cmdCtx passed to exec.StartContext + cmdCancel() + + // Move all errors from waitErrCh to errCh + for err := range waitErrCh { + errCh <- err + } + // Close errCh + close(errCh) } // Output - Creates a exec.Cmd using cmdStr. Runs exec.Cmd.Output and returns the resulting output as []byte and error diff --git a/exechelper_test.go b/exechelper_test.go index 957418f..d30f5b1 100644 --- a/exechelper_test.go +++ b/exechelper_test.go @@ -12,10 +12,13 @@ import ( "os/exec" "path" "strings" + "sync" + "syscall" "testing" "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/edwarnicke/exechelper" ) @@ -68,6 +71,9 @@ func TestStartWithContext(t *testing.T) { select { case err := <-errCh: assert.IsType(t, &exec.ExitError{}, err) // Because we canceled we will get an exec.ExitError{} + exitErr := err.(*exec.ExitError) + status := exitErr.ProcessState.Sys().(syscall.WaitStatus) + assert.Equal(t, status.Signal(), syscall.SIGKILL) assert.Empty(t, errCh) case <-time.After(time.Second): assert.Fail(t, "Failed to cancel context") @@ -168,3 +174,76 @@ func TestWithEnvKV(t *testing.T) { _, err := exechelper.Output("printenv", exechelper.WithEnvKV(key1)) assert.Error(t, err) } + +func TestWithGracePeriodWithContext(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + graceperiod := 200 * time.Millisecond + err := exechelper.Run("go build ./testcmds/afterterm") + require.NoError(t, err) + errCh := exechelper.Start( + fmt.Sprintf("./afterterm %s", graceperiod-50*time.Millisecond), + exechelper.WithContext(ctx), + exechelper.WithGracePeriod(graceperiod), + exechelper.WithStdout(os.Stdout), + ) + time.Sleep(100 * time.Millisecond) + cancelTime := time.Now() + cancel() + ok := true + for ok { + select { + case err, ok = <-errCh: + require.NoError(t, err) + case <-time.After(graceperiod + 50*time.Millisecond): + require.Failf(t, "", "failed to stop within graceperiod(%s): %s", graceperiod+50*time.Millisecond, time.Since(cancelTime)) + ok = false + } + } +} + +func TestWithGracePeriodExceeded(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + graceperiod := 200 * time.Millisecond + err := exechelper.Run("go build ./testcmds/afterterm") + require.NoError(t, err) + errCh := exechelper.Start( + fmt.Sprintf("./afterterm %s", 2*graceperiod), + exechelper.WithContext(ctx), + exechelper.WithGracePeriod(graceperiod), + exechelper.WithStdout(os.Stdout), + ) + time.Sleep(100 * time.Millisecond) + cancelTime := time.Now() + cancel() + ok := true + errOnce := sync.Once{} + for ok { + select { + case err, ok = <-errCh: + errOnce.Do(func() { + assert.True(t, ok, "at least one real err should be returned on errCh") + }) + if !ok { + break + } + require.IsType(t, &exec.ExitError{}, err) // Because graceperiod is exceeded, we get an ExitError for killed + exitErr := err.(*exec.ExitError) + status := exitErr.ProcessState.Sys().(syscall.WaitStatus) + assert.Equal(t, status.Signal(), syscall.SIGKILL) + assert.Empty(t, errCh) + case <-time.After(graceperiod + 100*time.Millisecond): + require.Failf(t, "", "failed to stop within graceperiod(%s): %s", graceperiod, time.Since(cancelTime)) + ok = false + } + } +} + +func TestWithGracePeriodWithoutContext(t *testing.T) { + graceperiod := 1 * time.Second + errCh := exechelper.Start( + "sleep 600", + exechelper.WithGracePeriod(graceperiod), + exechelper.WithStdout(os.Stdout), + ) + require.Error(t, <-errCh) +} diff --git a/options.go b/options.go index c66d447..2033b45 100644 --- a/options.go +++ b/options.go @@ -22,6 +22,7 @@ import ( "os" "os/exec" "strings" + "time" "github.com/pkg/errors" ) @@ -33,6 +34,8 @@ type CmdFunc func(cmd *exec.Cmd) error type Option struct { // Context - context (if any) for running the exec.Cmd Context context.Context + // SIGTERM grace period + GracePeriod time.Duration // CmdFunc to be applied to the exec.Cmd CmdOption CmdFunc } @@ -155,3 +158,9 @@ func WithEnvMap(envMap map[string]string) *Option { } return WithEnvKV(envs...) } + +// WithGracePeriod - will send a SIGTERM when ctx.Done() and wait up to gracePeriod before +// SIGKILLing the process. +func WithGracePeriod(gracePeriod time.Duration) *Option { + return &Option{GracePeriod: gracePeriod} +} diff --git a/options_linux.go b/options_linux.go new file mode 100644 index 0000000..ccf3966 --- /dev/null +++ b/options_linux.go @@ -0,0 +1,33 @@ +// Copyright (c) 2020 Cisco and/or its affiliates. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux + +package exechelper + +import ( + "os/exec" + "syscall" +) + +// WithOnDeathSignalChildren - set the signal that will be sent to children of process on processes death +// (only available on linux) +func WithOnDeathSignalChildren(signal syscall.Signal) *Option { + return CmdOption(func(cmd *exec.Cmd) error { + cmd.SysProcAttr.Pdeathsig = signal + return nil + }) +} diff --git a/testcmds/afterterm/main.go b/testcmds/afterterm/main.go new file mode 100644 index 0000000..c525d3c --- /dev/null +++ b/testcmds/afterterm/main.go @@ -0,0 +1,51 @@ +// Copyright (c) 2020 Cisco and/or its affiliates. +// +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package main + +import ( + "log" + "os" + "os/signal" + "syscall" + "time" +) + +func main() { + // Catch the signals + c := make(chan os.Signal, 1) + signal.Notify(c, []os.Signal{ + os.Interrupt, + // More Linux signals here + syscall.SIGHUP, + syscall.SIGTERM, + syscall.SIGQUIT, + }...) + log.Printf("starting...\n") + var d time.Duration + if len(os.Args) > 1 { + var err error + d, err = time.ParseDuration(os.Args[1]) + if err != nil { + log.Fatalf("os.Args[1]: %q is not a valid duration", os.Args[1]) + } + } + + sig := <-c + log.Printf("received signal %q\n", sig) + <-time.After(d) + log.Printf("exiting after %q\n", d) +}