Skip to content

Commit

Permalink
Close idle connections periodically to balance load through SLB
Browse files Browse the repository at this point in the history
Collector connections (and other clients) use HTTP keep-alive which
prevents connections from being closed on the server.  This can cause
problems when scaling out ingestor pods because the existing connections
persist and are not balanced to new pods.

This decreases the idle timeout to forces connections to close so a new connection
can be established which can land on a different host.
  • Loading branch information
jwilder committed Aug 22, 2023
1 parent 6de341b commit 9a859f2
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
7 changes: 6 additions & 1 deletion cmd/ingestor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,12 @@ func realMain(ctx *cli.Context) error {
metricsMux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
metricsMux.HandleFunc("/debug/pprof/trace", pprof.Trace)

srv := &http.Server{Handler: mux}
srv := &http.Server{
Handler: mux,
// Close idle connections fairly often to establish new connections through the load balancer
// so that long-lived connections don't stay pinned to the same node indefinitely.
IdleTimeout: 15 * time.Second,
}
srv.ErrorLog = newLogger()

go func() {
Expand Down
4 changes: 4 additions & 0 deletions collector/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ func (s *Service) scrape() {
s.wg.Add(1)
defer s.wg.Done()

reconnectTimer := time.NewTicker(5 * time.Minute)
defer reconnectTimer.Stop()
t := time.NewTicker(s.opts.ScrapeInterval)
defer t.Stop()
for {
Expand All @@ -199,6 +201,8 @@ func (s *Service) scrape() {
return
case <-t.C:
s.scrapeTargets()
case <-reconnectTimer.C:
s.remoteClient.CloseIdleConnections()
}
}
}
Expand Down
8 changes: 5 additions & 3 deletions pkg/promremote/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ func NewClient(timeout time.Duration, insecureSkipVerify bool) (*Client, error)
t := http.DefaultTransport.(*http.Transport).Clone()
t.MaxIdleConns = 100
t.MaxConnsPerHost = 100
t.MaxIdleConnsPerHost = 100
t.MaxIdleConnsPerHost = 5
t.ResponseHeaderTimeout = timeout
t.IdleConnTimeout = time.Minute
t.TLSClientConfig.InsecureSkipVerify = insecureSkipVerify
Expand Down Expand Up @@ -55,8 +55,6 @@ func (c *Client) Write(ctx context.Context, endpoint string, wr *prompb.WriteReq
req.Header.Set("X-Prometheus-Remote-Write-Version", "0.1.0")
req.Header.Set("User-Agent", "adx-mon")

// req.Close = true

resp, err := c.httpClient.Do(req)
if err != nil {
return fmt.Errorf("http post: %w", err)
Expand All @@ -75,3 +73,7 @@ func (c *Client) Write(ctx context.Context, endpoint string, wr *prompb.WriteReq
}
return nil
}

func (c *Client) CloseIdleConnections() {
c.httpClient.CloseIdleConnections()
}

0 comments on commit 9a859f2

Please sign in to comment.