-
-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Initial support for cooperative-sticky rebalancing #407
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -238,16 +238,23 @@ def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None: | |
"arroyo.consumer.partitions_assigned.count", len(partitions) | ||
) | ||
|
||
self.__buffered_messages.reset() | ||
current_partitions = dict(self.__consumer.tell()) | ||
current_partitions.update(partitions) | ||
|
||
if self.__dlq_policy: | ||
self.__dlq_policy.reset_offsets(partitions) | ||
if partitions: | ||
self.__dlq_policy.reset_dlq_limits(current_partitions) | ||
if current_partitions: | ||
if self.__processing_strategy is not None: | ||
logger.exception( | ||
# TODO: for cooperative-sticky rebalancing this can happen | ||
# quite often. we should port the changes to | ||
# ProcessingStrategyFactory that we made in Rust: Remove | ||
# create_with_partitions, replace with create + | ||
# update_partitions | ||
logger.error( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we downgrade to warning if this is expected to happen on a regular basis? |
||
"Partition assignment while processing strategy active" | ||
) | ||
_close_strategy() | ||
_create_strategy(partitions) | ||
_create_strategy(current_partitions) | ||
|
||
@_rdkafka_callback(metrics=self.__metrics_buffer) | ||
def on_partitions_revoked(partitions: Sequence[Partition]) -> None: | ||
|
@@ -278,6 +285,9 @@ def on_partitions_revoked(partitions: Sequence[Partition]) -> None: | |
except RuntimeError: | ||
pass | ||
|
||
for partition in partitions: | ||
self.__buffered_messages.remove(partition) | ||
|
||
# Partition revocation can happen anytime during the consumer lifecycle and happen | ||
# multiple times. What we want to know is that the consumer is not stuck somewhere. | ||
# The presence of this message as the last message of a consumer | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,10 @@ | |
|
||
from arroyo.backends.kafka import KafkaConsumer, KafkaPayload, KafkaProducer | ||
from arroyo.backends.kafka.commit import CommitCodec | ||
from arroyo.backends.kafka.configuration import build_kafka_configuration | ||
from arroyo.backends.kafka.configuration import ( | ||
KafkaBrokerConfig, | ||
build_kafka_configuration, | ||
) | ||
from arroyo.backends.kafka.consumer import as_kafka_configuration_bool | ||
from arroyo.commit import IMMEDIATE, Commit | ||
from arroyo.errors import ConsumerError, EndOfPartition | ||
|
@@ -71,10 +74,16 @@ def get_topic( | |
|
||
|
||
class TestKafkaStreams(StreamsTestMixin[KafkaPayload]): | ||
@property | ||
def configuration(self) -> KafkaBrokerConfig: | ||
config = { | ||
"bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "localhost:9092"), | ||
} | ||
|
||
configuration = build_kafka_configuration( | ||
{"bootstrap.servers": os.environ.get("DEFAULT_BROKERS", "localhost:9092")} | ||
) | ||
if self.incremental_rebalancing: | ||
config["partition.assignment.strategy"] = "cooperative-sticky" | ||
|
||
return build_kafka_configuration(config) | ||
|
||
@contextlib.contextmanager | ||
def get_topic(self, partitions: int = 1) -> Iterator[Topic]: | ||
|
@@ -90,7 +99,7 @@ def get_consumer( | |
enable_end_of_partition: bool = True, | ||
auto_offset_reset: str = "earliest", | ||
strict_offset_reset: Optional[bool] = None, | ||
max_poll_interval_ms: Optional[int] = None | ||
max_poll_interval_ms: Optional[int] = None, | ||
) -> KafkaConsumer: | ||
configuration = { | ||
**self.configuration, | ||
|
@@ -210,7 +219,9 @@ def test_consumer_polls_when_paused(self) -> None: | |
poll_interval = 6000 | ||
|
||
with self.get_topic() as topic: | ||
with closing(self.get_producer()) as producer, closing(self.get_consumer(max_poll_interval_ms=poll_interval)) as consumer: | ||
with closing(self.get_producer()) as producer, closing( | ||
self.get_consumer(max_poll_interval_ms=poll_interval) | ||
) as consumer: | ||
producer.produce(topic, next(self.get_payloads())).result(5.0) | ||
|
||
processor = StreamProcessor(consumer, topic, factory, IMMEDIATE) | ||
|
@@ -245,6 +256,10 @@ def test_consumer_polls_when_paused(self) -> None: | |
assert consumer.paused() == [] | ||
|
||
|
||
class TestKafkaStreamsIncrementalRebalancing(TestKafkaStreams): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. unused? |
||
incremental_rebalancing = True | ||
|
||
|
||
def test_commit_codec() -> None: | ||
commit = Commit( | ||
"group", Partition(Topic("topic"), 0), 0, time.time(), time.time() - 5 | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,6 +41,7 @@ def test_stream_processor_lifecycle() -> None: | |
|
||
# The processor should accept heartbeat messages without an assignment or | ||
# active processor. | ||
consumer.tell.return_value = {} | ||
consumer.poll.return_value = None | ||
processor._run_once() | ||
|
||
|
@@ -166,6 +167,7 @@ def test_stream_processor_termination_on_error() -> None: | |
offset = 0 | ||
now = datetime.now() | ||
|
||
consumer.tell.return_value = {} | ||
consumer.poll.return_value = BrokerValue(0, partition, offset, now) | ||
|
||
exception = NotImplementedError("error") | ||
|
@@ -199,6 +201,7 @@ def test_stream_processor_invalid_message_from_poll() -> None: | |
offset = 1 | ||
now = datetime.now() | ||
|
||
consumer.tell.return_value = {} | ||
consumer.poll.side_effect = [BrokerValue(0, partition, offset, now)] | ||
|
||
strategy = mock.Mock() | ||
|
@@ -236,6 +239,7 @@ def test_stream_processor_invalid_message_from_submit() -> None: | |
offset = 1 | ||
now = datetime.now() | ||
|
||
consumer.tell.return_value = {} | ||
consumer.poll.side_effect = [ | ||
BrokerValue(0, partition, offset, now), | ||
BrokerValue(1, partition, offset + 1, now), | ||
|
@@ -283,6 +287,7 @@ def test_stream_processor_create_with_partitions() -> None: | |
topic = Topic("topic") | ||
|
||
consumer = mock.Mock() | ||
consumer.tell.return_value = {} | ||
strategy = mock.Mock() | ||
factory = mock.Mock() | ||
factory.create_with_partitions.return_value = strategy | ||
|
@@ -306,13 +311,15 @@ def test_stream_processor_create_with_partitions() -> None: | |
assert factory.create_with_partitions.call_count == 1 | ||
assert create_args[1] == offsets_p0 | ||
|
||
consumer.tell.return_value = {**offsets_p0} | ||
|
||
# Second partition assigned | ||
offsets_p1 = {Partition(topic, 1): 0} | ||
assignment_callback(offsets_p1) | ||
|
||
create_args, _ = factory.create_with_partitions.call_args | ||
assert factory.create_with_partitions.call_count == 2 | ||
assert create_args[1] == offsets_p1 | ||
assert create_args[1] == {**offsets_p1, **offsets_p0} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. was this test change related to your other changes? since there's no cooperative rebalancing here, seems like the assertions should stay the same? |
||
|
||
processor._run_once() | ||
|
||
|
@@ -376,6 +383,7 @@ def run_commit_policy_test( | |
) -> Sequence[int]: | ||
commit = mock.Mock() | ||
consumer = mock.Mock() | ||
consumer.tell.return_value = {} | ||
consumer.commit_offsets = commit | ||
|
||
factory = CommitOffsetsFactory() | ||
|
@@ -551,6 +559,7 @@ def test_dlq() -> None: | |
partition = Partition(topic, 0) | ||
consumer = mock.Mock() | ||
consumer.poll.return_value = BrokerValue(0, partition, 1, datetime.now()) | ||
consumer.tell.return_value = {} | ||
strategy = mock.Mock() | ||
strategy.submit.side_effect = InvalidMessage(partition, 1) | ||
factory = mock.Mock() | ||
|
@@ -585,6 +594,7 @@ def test_healthcheck(tmpdir: py.path.local) -> None: | |
consumer = mock.Mock() | ||
now = datetime.now() | ||
consumer.poll.return_value = BrokerValue(0, partition, 1, now) | ||
consumer.tell.return_value = {} | ||
strategy = mock.Mock() | ||
strategy.submit.side_effect = InvalidMessage(partition, 1) | ||
factory = mock.Mock() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry i said the wrong thing earlier, this should be
group.protocol