Skip to content

Commit

Permalink
More granular node StoreChunks latency metric (#1240)
Browse files Browse the repository at this point in the history
  • Loading branch information
ian-shim authored Feb 7, 2025
1 parent ffdf18a commit 706e66b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 6 deletions.
9 changes: 5 additions & 4 deletions node/grpc/metrics_v2.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package grpc

import (
"time"

"github.com/Layr-Labs/eigenda/common"
"github.com/Layr-Labs/eigensdk-go/logging"
grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"google.golang.org/grpc"
"time"
)

const namespace = "eigenda_node"
Expand Down Expand Up @@ -47,7 +48,7 @@ func NewV2Metrics(logger logging.Logger, registry *prometheus.Registry) (*Metric
Help: "The latency of a StoreChunks() RPC call.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{},
[]string{"stage"},
)

storeChunksRequestSize := promauto.With(registry).NewGaugeVec(
Expand Down Expand Up @@ -94,8 +95,8 @@ func (m *MetricsV2) GetGRPCServerOption() grpc.ServerOption {
return m.grpcServerOption
}

func (m *MetricsV2) ReportStoreChunksLatency(latency time.Duration) {
m.storeChunksLatency.WithLabelValues().Observe(common.ToMilliseconds(latency))
func (m *MetricsV2) ReportStoreChunksLatency(stage string, latency time.Duration) {
m.storeChunksLatency.WithLabelValues(stage).Observe(common.ToMilliseconds(latency))
}

func (m *MetricsV2) ReportStoreChunksRequestSize(size uint64) {
Expand Down
9 changes: 7 additions & 2 deletions node/grpc/server_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,20 @@ func (s *ServerV2) StoreChunks(ctx context.Context, in *pb.StoreChunksRequest) (
return nil, err
}

stageTimer := time.Now()
blobShards, rawBundles, err := s.node.DownloadBundles(ctx, batch, operatorState)
if err != nil {
return nil, api.NewErrorInternal(fmt.Sprintf("failed to download batch: %v", err))
}
s.metrics.ReportStoreChunksLatency("download", time.Since(stageTimer))

type storeResult struct {
keys []kvstore.Key
err error
}
storeChan := make(chan storeResult)
go func() {
storageStart := time.Now()
keys, size, err := s.node.StoreV2.StoreBatch(batch, rawBundles)
if err != nil {
storeChan <- storeResult{
Expand All @@ -161,13 +164,14 @@ func (s *ServerV2) StoreChunks(ctx context.Context, in *pb.StoreChunksRequest) (
}

s.metrics.ReportStoreChunksRequestSize(size)

s.metrics.ReportStoreChunksLatency("storage", time.Since(storageStart))
storeChan <- storeResult{
keys: keys,
err: nil,
}
}()

stageTimer = time.Now()
err = s.node.ValidateBatchV2(ctx, batch, blobShards, operatorState)
if err != nil {
res := <-storeChan
Expand All @@ -178,6 +182,7 @@ func (s *ServerV2) StoreChunks(ctx context.Context, in *pb.StoreChunksRequest) (
}
return nil, api.NewErrorInternal(fmt.Sprintf("failed to validate batch: %v", err))
}
s.metrics.ReportStoreChunksLatency("validation", time.Since(stageTimer))

res := <-storeChan
if res.err != nil {
Expand All @@ -189,7 +194,7 @@ func (s *ServerV2) StoreChunks(ctx context.Context, in *pb.StoreChunksRequest) (
return nil, api.NewErrorInternal(fmt.Sprintf("failed to sign batch: %v", err))
}

s.metrics.ReportStoreChunksLatency(time.Since(start))
s.metrics.ReportStoreChunksLatency("total", time.Since(start))

return &pb.StoreChunksReply{
Signature: sig,
Expand Down

0 comments on commit 706e66b

Please sign in to comment.