Skip to content

Commit

Permalink
GH-3062: Fix KafkaBinderMetrics for resource leaks
Browse files Browse the repository at this point in the history
Fixes: #3062

The `KafkaBinderMetrics` creates `KafkaConsumer` instances and schedule the fix rate
task for them, but never closes them even when the `scheduler` is shut downed

* Implement a `Lifecycle` contract in the `KafkaBinderMetrics` and call `close()`
from the `stop()` to satisfy CRaC resource management expectations.
* Also close all the `KafkaConsumer` instances from the `metadataConsumers`

**Cherry-pick to `4.1.x`**
  • Loading branch information
artembilan committed Jan 3, 2025
1 parent af6b4ff commit 90afffb
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2016-2024 the original author or authors.
* Copyright 2016-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,16 +20,15 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.ToDoubleFunction;

Expand All @@ -50,10 +49,12 @@
import org.springframework.cloud.stream.binder.kafka.common.TopicInformation;
import org.springframework.cloud.stream.binder.kafka.properties.KafkaBinderConfigurationProperties;
import org.springframework.context.ApplicationListener;
import org.springframework.context.Lifecycle;
import org.springframework.kafka.core.ConsumerFactory;
import org.springframework.kafka.core.DefaultKafkaConsumerFactory;
import org.springframework.lang.Nullable;
import org.springframework.util.ObjectUtils;
import org.springframework.util.ReflectionUtils;

/**
* Metrics for Kafka binder.
Expand All @@ -72,7 +73,7 @@
* @author Omer Celik
*/
public class KafkaBinderMetrics
implements MeterBinder, ApplicationListener<BindingCreatedEvent>, AutoCloseable {
implements MeterBinder, ApplicationListener<BindingCreatedEvent>, AutoCloseable, Lifecycle {

private static final int DEFAULT_TIMEOUT = 5;

Expand Down Expand Up @@ -101,6 +102,8 @@ public class KafkaBinderMetrics

private final ReentrantLock consumerFactoryLock = new ReentrantLock();

private final AtomicBoolean running = new AtomicBoolean();

public KafkaBinderMetrics(KafkaMessageChannelBinder binder,
KafkaBinderConfigurationProperties binderConfigurationProperties,
ConsumerFactory<?, ?> defaultConsumerFactory,
Expand All @@ -125,14 +128,14 @@ public void setTimeout(int timeout) {

@Override
public void bindTo(MeterRegistry registry) {
/**
/*
* We can't just replace one scheduler with another.
* Before and even after the old one is gathered by GC, it's threads still exist, consume memory and CPU resources to switch contexts.
* Theoretically, as a result of processing n topics, there will be about (1+n)*n/2 threads simultaneously at the same time.
*/
if (this.scheduler != null) {
LOG.info("Try to shutdown the old scheduler with " + ((ScheduledThreadPoolExecutor) scheduler).getPoolSize() + " threads");
this.scheduler.shutdown();
this.scheduler.shutdownNow();
}

this.scheduler = Executors.newScheduledThreadPool(this.binder.getTopicsInUse().size());
Expand Down Expand Up @@ -278,10 +281,50 @@ public void onApplicationEvent(BindingCreatedEvent event) {
}

@Override
public void close() throws Exception {
if (this.meterRegistry != null) {
this.meterRegistry.find(OFFSET_LAG_METRIC_NAME).meters().forEach(this.meterRegistry::remove);
public void close() {
if (this.scheduler != null) {
this.consumerFactoryLock.lock();
try {
if (this.meterRegistry != null) {
this.meterRegistry.find(OFFSET_LAG_METRIC_NAME).meters().forEach(this.meterRegistry::remove);
}
this.scheduler.shutdownNow();
try {
this.scheduler.awaitTermination(
binderConfigurationProperties.getMetrics().getOffsetLagMetricsInterval().toSeconds(),
TimeUnit.SECONDS);
}
catch (InterruptedException ex) {
Thread.currentThread().interrupt();
ReflectionUtils.rethrowRuntimeException(ex);
}
}
finally {
this.scheduler = null;
this.metadataConsumers.values().forEach(Consumer::close);
this.metadataConsumers.clear();
this.consumerFactoryLock.unlock();
}
}
Optional.ofNullable(scheduler).ifPresent(ExecutorService::shutdown);
}

@Override
public void start() {
this.running.set(true);
// Nothing else to do here. The 'bindTo()' is called from the 'onApplicationEvent()',
// which, in turn, is emitted from the 'AbstractBindingLifecycle.start()' logic.
}

@Override
public void stop() {
if (this.running.compareAndSet(true, false)) {
close();
}
}

@Override
public boolean isRunning() {
return this.running.get();
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2016-2024 the original author or authors.
* Copyright 2016-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -57,6 +57,7 @@
* @author Tomek Szmytka
* @author Nico Heller
* @author Kurt Hong
* @author Artem Bilan
*/
class KafkaBinderMetricsTest {

Expand Down Expand Up @@ -346,10 +347,11 @@ public void usesBeginningOffsetIfNoCommittedOffsetFound() {
}

@Test
public void shouldShutdownSchedulerOnClose() throws Exception {
public void shouldShutdownSchedulerOnClose() {
metrics.bindTo(meterRegistry);
assertThat(metrics.scheduler).isNotNull();
metrics.close();
assertThat(metrics.scheduler.isShutdown()).isTrue();
assertThat(metrics.scheduler).isNull();
}

@Test
Expand Down

0 comments on commit 90afffb

Please sign in to comment.