Skip to content

Commit 35a42b8

Browse files
authored
Enhance SB polling intervals (#4328)
* Change usage of retry-after header With this change the retry-after header will be treated as an absolute value and not used for exponential backoff. Whenever the retry-after interval sent by the service broker is larger than the calculated interval using the base interval and exponential backoff, the retry-after interval will be used. When the CC calculated interval is larger the retry-after interval will be ignored. * Introduce config to cap service related polling interval The maximum polling interval was hardcoded to 24h when set on the job. Though, the exponential backoff calculation did not take this into account. Now the maximum can be configured through `broker_client_maximum_async_poll_interval_seconds` and will be taken into account for all scenarios. Default is still 24h and the config is optional.
1 parent 375620f commit 35a42b8

File tree

5 files changed

+195
-27
lines changed

5 files changed

+195
-27
lines changed

app/jobs/reoccurring_job.rb

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,12 @@ def maximum_duration_seconds=(duration)
3030
end
3131

3232
def polling_interval_seconds
33-
[@polling_interval || 0, default_polling_interval_seconds].max
33+
@polling_interval || 0
3434
end
3535

3636
def polling_interval_seconds=(interval)
3737
interval = interval.to_i if interval.is_a? String
38-
@polling_interval = interval.clamp(default_polling_interval_seconds, 24.hours)
38+
@polling_interval = interval.clamp(default_polling_interval_seconds, maximum_polling_interval)
3939
end
4040

4141
private
@@ -58,8 +58,16 @@ def default_polling_exponential_backoff
5858
Config.config.get(:broker_client_async_poll_exponential_backoff_rate)
5959
end
6060

61+
def maximum_polling_interval
62+
Config.config.get(:broker_client_max_async_poll_interval_seconds)
63+
end
64+
6165
def next_execution_in
62-
polling_interval_seconds * (default_polling_exponential_backoff**retry_number)
66+
# use larger polling_interval. Either from job or calculated.
67+
polling_interval = [polling_interval_seconds, default_polling_interval_seconds * (default_polling_exponential_backoff**retry_number)].max
68+
69+
# cap polling interval at maximum_polling_interval
70+
[polling_interval, maximum_polling_interval].min
6371
end
6472

6573
def next_enqueue_would_exceed_maximum_duration?

config/cloud_controller.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ max_retained_revisions_per_app: 100
7171
broker_client_default_async_poll_interval_seconds: 60
7272
broker_client_max_async_poll_duration_minutes: 10080
7373
broker_client_async_poll_exponential_backoff_rate: 1.0
74+
broker_client_max_async_poll_interval_seconds: 86400
7475

7576
broker_client_response_parser:
7677
log_errors: true

lib/cloud_controller/config_schemas/base/api_schema.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ class ApiSchema < VCAP::Config
209209
broker_client_default_async_poll_interval_seconds: Integer,
210210
broker_client_max_async_poll_duration_minutes: Integer,
211211
broker_client_async_poll_exponential_backoff_rate: Numeric,
212+
broker_client_max_async_poll_interval_seconds: Integer,
212213
optional(:broker_client_response_parser) => {
213214
log_errors: bool,
214215
log_validators: bool,

lib/cloud_controller/config_schemas/base/worker_schema.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ class WorkerSchema < VCAP::Config
115115
broker_client_default_async_poll_interval_seconds: Integer,
116116
broker_client_max_async_poll_duration_minutes: Integer,
117117
broker_client_async_poll_exponential_backoff_rate: Numeric,
118+
broker_client_max_async_poll_interval_seconds: Integer,
118119
optional(:broker_client_response_parser) => {
119120
log_errors: bool,
120121
log_validators: bool,

spec/unit/jobs/reoccurring_job_spec.rb

Lines changed: 181 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def perform
109109
end
110110
end
111111

112-
it 'keeps the polling interval within the bounds' do
112+
it 'keeps the polling interval within the default bounds' do
113113
job = FakeJob.new
114114
job.polling_interval_seconds = 5
115115
expect(job.polling_interval_seconds).to eq(60)
@@ -118,11 +118,25 @@ def perform
118118
expect(job.polling_interval_seconds).to eq(24.hours)
119119
end
120120

121-
context 'exponential backoff rate' do
122-
context 'updates the polling interval' do
123-
it 'when changing exponential backoff rate only' do
121+
context 'when maximum polling interval is configured' do
122+
before do
123+
TestConfig.config[:broker_client_max_async_poll_interval_seconds] = 1800
124+
end
125+
126+
it 'limits the polling interval to the configured maximum' do
127+
job = FakeJob.new
128+
job.polling_interval_seconds = 10.days
129+
expect(job.polling_interval_seconds).to eq(1800)
130+
end
131+
end
132+
133+
describe 'exponential backoff rate' do
134+
context 'when changing exponential backoff rate only' do
135+
before do
124136
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0
137+
end
125138

139+
it 'updates the polling interval' do
126140
enqueued_time = 0
127141

128142
Timecop.freeze do
@@ -141,11 +155,15 @@ def perform
141155
end
142156
end
143157
end
158+
end
144159

145-
it 'when changing exponential backoff rate and default polling interval' do
160+
context 'when changing exponential backoff rate and default polling interval' do
161+
before do
146162
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
147163
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10
164+
end
148165

166+
it 'updates the polling interval' do
149167
enqueued_time = 0
150168

151169
Timecop.freeze do
@@ -164,36 +182,175 @@ def perform
164182
end
165183
end
166184
end
185+
end
167186

168-
it 'when changing exponential backoff rate and retry_after from the job' do
169-
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
170-
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10
187+
describe 'changing exponential backoff rate and retry_after from the job' do
188+
context 'when retry-after is larger than calculated backoff' do
189+
let(:fake_job) { FakeJob.new(retry_after: [20, 30]) }
171190

172-
enqueued_time = 0
191+
before do
192+
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
193+
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10
194+
end
173195

174-
Timecop.freeze do
175-
Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(FakeJob.new(retry_after: [20, 30]))
176-
execute_all_jobs(expected_successes: 1, expected_failures: 0)
177-
enqueued_time = Time.now
196+
it 'uses retry-after interval' do
197+
enqueued_time = 0
198+
199+
Timecop.freeze do
200+
Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(fake_job)
201+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
202+
enqueued_time = Time.now
203+
end
204+
205+
# the job should run after 20s (20s > 10 * 1.3^0)
206+
Timecop.freeze(19.seconds.after(enqueued_time)) do
207+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
208+
end
209+
210+
Timecop.freeze(21.seconds.after(enqueued_time)) do
211+
enqueued_time = Time.now
212+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
213+
end
214+
215+
# the job should run after 30s (30s > 10 * 1.3^1)
216+
Timecop.freeze(29.seconds.after(enqueued_time)) do
217+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
218+
end
219+
220+
Timecop.freeze(31.seconds.after(enqueued_time)) do
221+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
222+
end
178223
end
224+
end
179225

180-
# the job should run after 20s * 1.3^0 = 20 seconds
181-
Timecop.freeze(19.seconds.after(enqueued_time)) do
182-
execute_all_jobs(expected_successes: 0, expected_failures: 0)
226+
context 'when retry-after is smaller than calculated backoff' do
227+
let(:fake_job) { FakeJob.new(retry_after: [10, 20]) }
228+
229+
before do
230+
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
231+
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 30
183232
end
184233

185-
Timecop.freeze(21.seconds.after(enqueued_time)) do
186-
enqueued_time = Time.now
187-
execute_all_jobs(expected_successes: 1, expected_failures: 0)
234+
it 'uses calculated interval' do
235+
enqueued_time = 0
236+
237+
Timecop.freeze do
238+
Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(fake_job)
239+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
240+
enqueued_time = Time.now
241+
end
242+
243+
# the job should run after 30s (30s > 10s)
244+
Timecop.freeze(29.seconds.after(enqueued_time)) do
245+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
246+
end
247+
248+
Timecop.freeze(31.seconds.after(enqueued_time)) do
249+
enqueued_time = Time.now
250+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
251+
end
252+
253+
# the job should run after 30s (30s * 1.3^1 = 39 > 20s)
254+
Timecop.freeze(38.seconds.after(enqueued_time)) do
255+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
256+
end
257+
258+
Timecop.freeze(40.seconds.after(enqueued_time)) do
259+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
260+
end
188261
end
262+
end
263+
264+
context 'when calculated backoff gets larger than retry-after' do
265+
let(:fake_job) { FakeJob.new(retry_after: [15, 15, 15]) }
189266

190-
# the job should run after 30s * 1.3^1 = 39 seconds
191-
Timecop.freeze(38.seconds.after(enqueued_time)) do
192-
execute_all_jobs(expected_successes: 0, expected_failures: 0)
267+
before do
268+
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2
269+
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 5
193270
end
194271

195-
Timecop.freeze(40.seconds.after(enqueued_time)) do
196-
execute_all_jobs(expected_successes: 1, expected_failures: 0)
272+
it 'uses retry-after until calculated backoff is larger' do
273+
enqueued_time = 0
274+
275+
Timecop.freeze do
276+
Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(fake_job)
277+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
278+
enqueued_time = Time.now
279+
end
280+
281+
# the job should run after 15s (15s > 5s (5 * 2^0))
282+
Timecop.freeze(14.seconds.after(enqueued_time)) do
283+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
284+
end
285+
286+
Timecop.freeze(16.seconds.after(enqueued_time)) do
287+
enqueued_time = Time.now
288+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
289+
end
290+
291+
# the job should run after 15s (15s > 10s (5 * 2^1))
292+
Timecop.freeze(14.seconds.after(enqueued_time)) do
293+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
294+
end
295+
296+
Timecop.freeze(16.seconds.after(enqueued_time)) do
297+
enqueued_time = Time.now
298+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
299+
end
300+
301+
# the job should run after 20s (20s > 15s (5 * 2^2))
302+
Timecop.freeze(19.seconds.after(enqueued_time)) do
303+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
304+
end
305+
306+
Timecop.freeze(21.seconds.after(enqueued_time)) do
307+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
308+
end
309+
end
310+
311+
context 'when maximum polling interval is configured' do
312+
before do
313+
TestConfig.config[:broker_client_max_async_poll_interval_seconds] = 18
314+
end
315+
316+
it 'limits the polling interval to the configured maximum' do
317+
enqueued_time = 0
318+
319+
Timecop.freeze do
320+
Jobs::Enqueuer.new(queue: Jobs::Queues.generic).enqueue_pollable(fake_job)
321+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
322+
enqueued_time = Time.now
323+
end
324+
325+
# the job should run after 15s (15s > 5s (5 * 2^0))
326+
Timecop.freeze(14.seconds.after(enqueued_time)) do
327+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
328+
end
329+
330+
Timecop.freeze(16.seconds.after(enqueued_time)) do
331+
enqueued_time = Time.now
332+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
333+
end
334+
335+
# the job should run after 15s (15s > 10s (5 * 2^1))
336+
Timecop.freeze(14.seconds.after(enqueued_time)) do
337+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
338+
end
339+
340+
Timecop.freeze(16.seconds.after(enqueued_time)) do
341+
enqueued_time = Time.now
342+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
343+
end
344+
345+
# the job should run after 18s (capped at )
346+
Timecop.freeze(17.seconds.after(enqueued_time)) do
347+
execute_all_jobs(expected_successes: 0, expected_failures: 0)
348+
end
349+
350+
Timecop.freeze(19.seconds.after(enqueued_time)) do
351+
execute_all_jobs(expected_successes: 1, expected_failures: 0)
352+
end
353+
end
197354
end
198355
end
199356
end

0 commit comments

Comments
 (0)