From bcbb7b6531b0971442d9d3f80127bea065fe5385 Mon Sep 17 00:00:00 2001 From: ___ Date: Mon, 21 Aug 2023 20:10:26 +0000 Subject: [PATCH] Auto-create tracing spans for log groups This will make it easy for CI users to trace and analyze their CI jobs without requiring additional work or scripting on their part. They can just look at tracing information on their tracing provider. --- agent/agent_configuration.go | 1 + clicommand/agent_start.go | 13 ++++++++++++- clicommand/bootstrap.go | 7 +++++++ internal/job/config.go | 3 +++ internal/job/executor.go | 1 + internal/job/shell/shell.go | 34 +++++++++++++++++++++++++++++++--- 6 files changed, 55 insertions(+), 4 deletions(-) diff --git a/agent/agent_configuration.go b/agent/agent_configuration.go index 5381d24c4e..171a231d32 100644 --- a/agent/agent_configuration.go +++ b/agent/agent_configuration.go @@ -57,4 +57,5 @@ type AgentConfiguration struct { AcquireJob string TracingBackend string TracingServiceName string + TraceLogGroups bool } diff --git a/clicommand/agent_start.go b/clicommand/agent_start.go index 0a862ed43c..1129992c94 100644 --- a/clicommand/agent_start.go +++ b/clicommand/agent_start.go @@ -142,6 +142,7 @@ type AgentStartConfig struct { MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"` TracingBackend string `cli:"tracing-backend"` TracingServiceName string `cli:"tracing-service-name"` + TraceLogGroups bool `cli:"trace-log-groups"` // Global flags Debug bool `cli:"debug"` @@ -582,6 +583,11 @@ var AgentStartCommand = cli.Command{ EnvVar: "BUILDKITE_TRACING_SERVICE_NAME", Value: "buildkite-agent", }, + cli.BoolFlag{ + Name: "trace-log-groups", + Usage: "Automatically creates tracing spans for log groups when tracing is enabled.", + EnvVar: "BUILDKITE_TRACE_LOG_GROUPS", + }, cli.StringFlag{ Name: "job-verification-key-path", Usage: "Path to a file containing a verification key. Passing this flag enables job verification. For hmac-sha256, the raw file content is used as the shared key", @@ -800,9 +806,13 @@ var AgentStartCommand = cli.Command{ }) // Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing - if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has { + _, tracingEnabled := tracetools.ValidTracingBackends[cfg.TracingBackend] + if !tracingEnabled { l.Fatal("The given tracing backend %q is not supported. Valid backends are: %q", cfg.TracingBackend, maps.Keys(tracetools.ValidTracingBackends)) } + if !tracingEnabled && cfg.TraceLogGroups { + l.Fatal("Log group tracing cannot be enabled without a tracing backend.") + } if experiments.IsEnabled(experiments.AgentAPI) { shutdown := runAgentAPI(ctx, l, cfg.SocketsPath) @@ -864,6 +874,7 @@ var AgentStartCommand = cli.Command{ AcquireJob: cfg.AcquireJob, TracingBackend: cfg.TracingBackend, TracingServiceName: cfg.TracingServiceName, + TraceLogGroups: cfg.TraceLogGroups, JobVerificationNoSignatureBehavior: cfg.JobVerificationNoSignatureBehavior, JobVerificationInvalidSignatureBehavior: cfg.JobVerificationInvalidSignatureBehavior, diff --git a/clicommand/bootstrap.go b/clicommand/bootstrap.go index a15af5034d..80c7912baf 100644 --- a/clicommand/bootstrap.go +++ b/clicommand/bootstrap.go @@ -92,6 +92,7 @@ type BootstrapConfig struct { RedactedVars []string `cli:"redacted-vars" normalize:"list"` TracingBackend string `cli:"tracing-backend"` TracingServiceName string `cli:"tracing-service-name"` + TraceLogGroups bool `cli:"trace-log-groups"` } var BootstrapCommand = cli.Command{ @@ -353,6 +354,11 @@ var BootstrapCommand = cli.Command{ EnvVar: "BUILDKITE_TRACING_SERVICE_NAME", Value: "buildkite-agent", }, + cli.BoolFlag{ + Name: "trace-log-groups", + Usage: "Automatically creates tracing spans for log groups when tracing is enabled.", + EnvVar: "BUILDKITE_TRACE_LOG_GROUPS", + }, DebugFlag, LogLevelFlag, ExperimentsFlag, @@ -437,6 +443,7 @@ var BootstrapCommand = cli.Command{ Tag: cfg.Tag, TracingBackend: cfg.TracingBackend, TracingServiceName: cfg.TracingServiceName, + TraceLogGroups: cfg.TraceLogGroups, }) cctx, cancel := context.WithCancel(ctx) diff --git a/internal/job/config.go b/internal/job/config.go index ac52f9aaf6..2925479df5 100644 --- a/internal/job/config.go +++ b/internal/job/config.go @@ -164,6 +164,9 @@ type ExecutorConfig struct { // Service name to use when reporting traces. TracingServiceName string + + // Whether to auto-create tracing spans for log groups. + TraceLogGroups bool } // ReadFromEnvironment reads configuration from the Environment, returns a map diff --git a/internal/job/executor.go b/internal/job/executor.go index 5ac3afad98..1f202530af 100644 --- a/internal/job/executor.go +++ b/internal/job/executor.go @@ -82,6 +82,7 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) { e.shell.Debug = e.ExecutorConfig.Debug e.shell.InterruptSignal = e.ExecutorConfig.CancelSignal e.shell.SignalGracePeriod = e.ExecutorConfig.SignalGracePeriod + e.shell.TraceLogGroups = e.ExecutorConfig.TraceLogGroups } if experiments.IsEnabled(experiments.KubernetesExec) { kubernetesClient := &kubernetes.Client{} diff --git a/internal/job/shell/shell.go b/internal/job/shell/shell.go index 4654c772e7..c6fbae4206 100644 --- a/internal/job/shell/shell.go +++ b/internal/job/shell/shell.go @@ -72,6 +72,9 @@ type Shell struct { // Amount of time to wait between sending the InterruptSignal and SIGKILL SignalGracePeriod time.Duration + + // Whether to auto create tracing spans for log groups + TraceLogGroups bool } // New returns a new Shell @@ -532,6 +535,29 @@ func round(d time.Duration) time.Duration { } } +// spanMakerWriter is an io.Writer shim that captures logs and automatically creates trace spans for the log group. +type spanMakerWriter struct { + w io.Writer + ctx context.Context + span opentracing.Span +} + +func (s *spanMakerWriter) Write(p []byte) (n int, err error) { + if bytes.HasPrefix(p, []byte("~~~ ")) || bytes.HasPrefix(p, []byte("--- ")) || bytes.HasPrefix(p, []byte("+++ ")) { + s.FinishIfActive() + operationName, _, _ := strings.Cut(string(p[4:]), "\r\n") + s.span, s.ctx = opentracing.StartSpanFromContext(s.ctx, operationName) + } + return s.w.Write(p) +} + +func (s *spanMakerWriter) FinishIfActive() { + if s.span != nil { + s.span.Finish() + s.span = nil + } +} + func (s *Shell) executeCommand( ctx context.Context, cmd *command, @@ -542,6 +568,8 @@ func (s *Shell) executeCommand( tracedEnv := env.FromSlice(cmd.Env) s.injectTraceCtx(ctx, tracedEnv) cmd.Env = tracedEnv.ToSlice() + logToSpanWriter := &spanMakerWriter{w: w, ctx: ctx, span: nil} + defer func() { logToSpanWriter.FinishIfActive() }() s.cmdLock.Lock() s.cmd = cmd @@ -561,11 +589,11 @@ func (s *Shell) executeCommand( // Modify process config based on execution flags if flags.PTY { cfg.PTY = true - cfg.Stdout = w + cfg.Stdout = logToSpanWriter } else { // Show stdout if requested or via debug if flags.Stdout { - cfg.Stdout = w + cfg.Stdout = logToSpanWriter } else if s.Debug { stdOutStreamer := NewLoggerStreamer(s.Logger) defer stdOutStreamer.Close() @@ -574,7 +602,7 @@ func (s *Shell) executeCommand( // Show stderr if requested or via debug if flags.Stderr { - cfg.Stderr = w + cfg.Stderr = logToSpanWriter } else if s.Debug { stdErrStreamer := NewLoggerStreamer(s.Logger) defer stdErrStreamer.Close()