From 1dfbcf824272d6f9da26ee542753f93efe54bd17 Mon Sep 17 00:00:00 2001 From: Tommy Reilly Date: Tue, 12 Nov 2024 10:38:13 -0500 Subject: [PATCH] Fix metrics (#2998) * Fix metrics * Fix errors against parca-dev/opentelemetry-ebpf-profile HEAD * update go.mod to point to github.com/parca-dev/opentelemetry-ebpf-profiler HEAD --- go.mod | 4 ++-- go.sum | 8 +++---- main.go | 29 ++++++++++--------------- metrics/all.go | 44 +------------------------------------- metrics/genschema/gen.py | 4 ++-- metrics/metrics.json | 7 ++++++ reporter/parca_reporter.go | 11 ++++++++-- 7 files changed, 36 insertions(+), 71 deletions(-) diff --git a/go.mod b/go.mod index d97ce87fef..e299da9f3b 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/common-nighthawk/go-figure v0.0.0-20210622060536-734e95fb86be github.com/containerd/containerd v1.7.20 github.com/docker/docker v26.1.5+incompatible - github.com/elastic/go-freelru v0.13.0 + github.com/elastic/go-freelru v0.15.0 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 @@ -149,4 +149,4 @@ require ( sigs.k8s.io/yaml v1.3.0 // indirect ) -replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241030163650-5b64dd57b3ea +replace go.opentelemetry.io/ebpf-profiler => github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241107010347-d633df82a9a2 diff --git a/go.sum b/go.sum index 81a598d91c..da6cdca29e 100644 --- a/go.sum +++ b/go.sum @@ -87,8 +87,8 @@ github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/elastic/go-freelru v0.13.0 h1:TKKY6yCfNNNky7Pj9xZAOEpBcdNgZJfihEftOb55omg= -github.com/elastic/go-freelru v0.13.0/go.mod h1:bSdWT4M0lW79K8QbX6XY2heQYSCqD7THoYf82pT/H3I= +github.com/elastic/go-freelru v0.15.0 h1:Jo1aY8JAvpyxbTDJEudrsBfjFDaALpfVv8mxuh9sfvI= +github.com/elastic/go-freelru v0.15.0/go.mod h1:bSdWT4M0lW79K8QbX6XY2heQYSCqD7THoYf82pT/H3I= github.com/elastic/go-perf v0.0.0-20241016160959-1342461adb4a h1:ymmtaN4bVCmKKeu4XEf6JEWNZKRXPMng1zjpKd+8rCU= github.com/elastic/go-perf v0.0.0-20241016160959-1342461adb4a/go.mod h1:Nt+pnRYvf0POC+7pXsrv8ubsEOSsaipJP0zlz1Ms1RM= github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= @@ -244,8 +244,8 @@ github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bl github.com/opencontainers/runtime-spec v1.1.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/selinux v1.11.0 h1:+5Zbo97w3Lbmb3PeqQtpmTkMwsW5nRI3YaLpt7tQ7oU= github.com/opencontainers/selinux v1.11.0/go.mod h1:E5dMC3VPuVvVHDYmi78qvhJp8+M586T4DlDRYpFkyec= -github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241030163650-5b64dd57b3ea h1:HNLGar6f5Sy9DqZirDu4zIniX3Bg90GelQB9HKdhoRQ= -github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241030163650-5b64dd57b3ea/go.mod h1:poOphaa9n1NeidFV425KI5PzP6Ho90LEkERlJqLBEpE= +github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241107010347-d633df82a9a2 h1:ZcYFRGuhgf3od4taXiiYOSx4k5W3aoQD22kdv9jK6Ms= +github.com/parca-dev/opentelemetry-ebpf-profiler v0.0.0-20241107010347-d633df82a9a2/go.mod h1:LZs0Ai6k5IPICeMqXRDpr1uyW7NJnoXgyrlaQh36XSM= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= diff --git a/main.go b/main.go index 592ba08084..182f5f6dc1 100644 --- a/main.go +++ b/main.go @@ -23,14 +23,6 @@ import ( "github.com/apache/arrow/go/v16/arrow/memory" "github.com/armon/circbuf" "github.com/common-nighthawk/go-figure" - "go.opentelemetry.io/ebpf-profiler/host" - otelmetrics "go.opentelemetry.io/ebpf-profiler/metrics" - otelreporter "go.opentelemetry.io/ebpf-profiler/reporter" - "go.opentelemetry.io/ebpf-profiler/times" - "go.opentelemetry.io/ebpf-profiler/tracehandler" - "go.opentelemetry.io/ebpf-profiler/tracer" - tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" - "go.opentelemetry.io/ebpf-profiler/util" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/promauto" @@ -41,6 +33,14 @@ import ( "github.com/tklauser/numcpus" "github.com/zcalusic/sysinfo" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + "go.opentelemetry.io/ebpf-profiler/host" + otelmetrics "go.opentelemetry.io/ebpf-profiler/metrics" + otelreporter "go.opentelemetry.io/ebpf-profiler/reporter" + "go.opentelemetry.io/ebpf-profiler/times" + "go.opentelemetry.io/ebpf-profiler/tracehandler" + "go.opentelemetry.io/ebpf-profiler/tracer" + tracertypes "go.opentelemetry.io/ebpf-profiler/tracer/types" + "go.opentelemetry.io/ebpf-profiler/util" "go.opentelemetry.io/otel/trace" "go.opentelemetry.io/otel/trace/noop" "golang.org/x/sys/unix" @@ -249,10 +249,6 @@ func mainWithExitCode() flags.ExitCode { return flags.Failure(fmt.Sprintf("Failed to probe eBPF syscall: %v", err)) } - if err = tracer.ProbeTracepoint(); err != nil { - log.Warnf("Failed to probe tracepoint: %v. Parca-agent may fail to run on some kernel versions.", err) - } - externalLabels := reporter.Labels{} if len(f.Metadata.ExternalLabels) > 0 { for name, value := range f.Metadata.ExternalLabels { @@ -317,7 +313,7 @@ func mainWithExitCode() flags.ExitCode { return flags.Failure("Failed to start reporting: %v", err) } otelmetrics.SetReporter(parcaReporter) - parcaReporter.Run(mainCtx) + parcaReporter.Start(mainCtx) var rep otelreporter.Reporter = parcaReporter // Load the eBPF code and map definitions @@ -340,11 +336,8 @@ func mainWithExitCode() flags.ExitCode { log.Printf("eBPF tracer loaded") defer trc.Close() - // Initial scan of /proc filesystem to list currently-active PIDs and have them processed. - if err = trc.StartPIDEventProcessor(mainCtx); err != nil { - log.Errorf("Failed to list processes from /proc: %v", err) - } - log.Debug("Completed initial PID listing") + // Start watching for PID events. + trc.StartPIDEventProcessor(mainCtx) // Attach our tracer to the perf event if err := trc.AttachTracer(); err != nil { diff --git a/metrics/all.go b/metrics/all.go index 0f44905691..203c385165 100644 --- a/metrics/all.go +++ b/metrics/all.go @@ -22,7 +22,7 @@ const ( MetricTypeCounter = 1 ) -var AllMetrics = map[otelmetrics.MetricID]Metric{ +var AllMetrics = map[otelmetrics.MetricID]Metric { otelmetrics.IDInvalid: { Desc: "Leave out the 0 value. It's an indication of not explicitly initialized variables.", Field: "", @@ -275,48 +275,6 @@ var AllMetrics = map[otelmetrics.MetricID]Metric{ Type: MetricTypeCounter, Unit: MetricUnitNone, }, - otelmetrics.IDExeMetadataOverwrite: { - Desc: "Indicator for whether the exeMetadata queue has been overwritten", - Field: "agent.overwrites.exe_metadata", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDCountsForTracesOverwrite: { - Desc: "Indicator for whether the countsForTraces queue has been overwritten", - Field: "agent.overwrites.counts_for_traces", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDMetricsOverwrite: { - Desc: "Indicator for whether the metrics queue has been overwritten", - Field: "agent.overwrites.metrics", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDFramesForTracesOverwrite: { - Desc: "Indicator for whether the framesForTraces queue has been overwritten", - Field: "agent.overwrites.frames_for_traces", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDFrameMetadataOverwrite: { - Desc: "Indicator for whether the frameMetadata queue has been overwritten", - Field: "agent.overwrites.frame_metadata", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDHostMetadataOverwrite: { - Desc: "Indicator for whether the hostMetadata queue has been overwritten", - Field: "agent.overwrites.host_metadata", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, - otelmetrics.IDFallbackSymbolsOverwrite: { - Desc: "Indicator for whether the fallbackSymbols queue has been overwritten", - Field: "agent.overwrites.fallback_symbols", - Type: MetricTypeCounter, - Unit: MetricUnitNone, - }, otelmetrics.IDPerfEventLost: { Desc: "Number of lost perf events in the communication between kernel and user space (report_events)", Field: "agent.errors.perf_event_lost", diff --git a/metrics/genschema/gen.py b/metrics/genschema/gen.py index 78e6bd1545..0f256a9895 100644 --- a/metrics/genschema/gen.py +++ b/metrics/genschema/gen.py @@ -20,7 +20,7 @@ def read_json_array(filename): package metrics import ( -\totelmetrics "github.com/open-telemetry/opentelemetry-ebpf-profiler/metrics" +\totelmetrics "go.opentelemetry.io/ebpf-profiler/metrics" ) const ( @@ -64,7 +64,7 @@ def get_unit(s): return "MetricUnitSeconds" case _: raise ValueError(f"Unknown metric unit: {s}") - + for metric in data: if not "name" in metric: continue diff --git a/metrics/metrics.json b/metrics/metrics.json index 2cc57f9b30..76232ff8a2 100644 --- a/metrics/metrics.json +++ b/metrics/metrics.json @@ -434,6 +434,7 @@ "id": 59 }, { + "obsolete": true, "description": "Indicator for whether the exeMetadata queue has been overwritten", "type": "counter", "name": "ExeMetadataOverwrite", @@ -441,6 +442,7 @@ "id": 60 }, { + "obsolete": true, "description": "Indicator for whether the countsForTraces queue has been overwritten", "type": "counter", "name": "CountsForTracesOverwrite", @@ -448,6 +450,7 @@ "id": 61 }, { + "obsolete": true, "description": "Indicator for whether the metrics queue has been overwritten", "type": "counter", "name": "MetricsOverwrite", @@ -455,6 +458,7 @@ "id": 62 }, { + "obsolete": true, "description": "Indicator for whether the framesForTraces queue has been overwritten", "type": "counter", "name": "FramesForTracesOverwrite", @@ -462,6 +466,7 @@ "id": 63 }, { + "obsolete": true, "description": "Indicator for whether the frameMetadata queue has been overwritten", "type": "counter", "name": "FrameMetadataOverwrite", @@ -469,6 +474,7 @@ "id": 64 }, { + "obsolete": true, "description": "Indicator for whether the hostMetadata queue has been overwritten", "type": "counter", "name": "HostMetadataOverwrite", @@ -476,6 +482,7 @@ "id": 65 }, { + "obsolete": true, "description": "Indicator for whether the fallbackSymbols queue has been overwritten", "type": "counter", "name": "FallbackSymbolsOverwrite", diff --git a/reporter/parca_reporter.go b/reporter/parca_reporter.go index ee6caeb1f3..58ec3d8255 100644 --- a/reporter/parca_reporter.go +++ b/reporter/parca_reporter.go @@ -235,6 +235,13 @@ func (r *ParcaReporter) ReportFramesForTrace(_ *libpf.Trace) {} func (r *ParcaReporter) ReportCountForTrace(_ libpf.TraceHash, _ uint16, _ *reporter.TraceEventMeta) { } +// ExecutableKnown returns true if the metadata of the Executable specified by fileID is +// cached in the reporter. +func (r *ParcaReporter) ExecutableKnown(fileID libpf.FileID) bool { + _, known := r.executables.Get(fileID) + return known +} + // ExecutableMetadata accepts a fileID with the corresponding filename // and caches this information. func (r *ParcaReporter) ExecutableMetadata(args *reporter.ExecutableMetadataArgs) { @@ -526,7 +533,7 @@ func New( return r, nil } -func (r *ParcaReporter) Run(mainCtx context.Context) (reporter.Reporter, error) { +func (r *ParcaReporter) Start(mainCtx context.Context) error { // Create a child context for reporting features ctx, cancelReporting := context.WithCancel(mainCtx) @@ -564,7 +571,7 @@ func (r *ParcaReporter) Run(mainCtx context.Context) (reporter.Reporter, error) cancelReporting() }() - return r, nil + return nil } // reportDataToBackend creates and sends out an arrow record for a Parca backend.