Skip to content

Commit a844338

Browse files
authored
Resiliency: mark a service as used even if the getClient call during warmUp fails (#1040)
* Mark service as _usedService regardless of whether getClient call succeeds * Update changelog * Remove empty test (added mistakenly)
1 parent 988c403 commit a844338

File tree

5 files changed

+40
-12
lines changed

5 files changed

+40
-12
lines changed

CHANGELOG.md

+5-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ and what APIs have changed, if applicable.
1414

1515
## [Unreleased]
1616

17+
## [29.64.1] - 2025-02-15
18+
- Fix warmUp -- record service as used regardless of whether getClient succeeds
19+
1720
## [29.64.0] - 2025-01-31
1821
- Allow subscribing to a single D2URI
1922

@@ -5770,7 +5773,8 @@ patch operations can re-use these classes for generating patch messages.
57705773

57715774
## [0.14.1]
57725775

5773-
[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.64.0...master
5776+
[Unreleased]: https://github.com/linkedin/rest.li/compare/v29.64.1...master
5777+
[29.64.1]: https://github.com/linkedin/rest.li/compare/v29.64.0...v29.64.1
57745778
[29.64.0]: https://github.com/linkedin/rest.li/compare/v29.63.2...v29.64.0
57755779
[29.63.2]: https://github.com/linkedin/rest.li/compare/v29.63.1...v29.63.2
57765780
[29.63.1]: https://github.com/linkedin/rest.li/compare/v29.63.0...v29.63.1

d2-test-api/src/main/java/com/linkedin/d2/balancer/util/TestLoadBalancer.java

+12-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import java.util.concurrent.Executors;
3737
import java.util.concurrent.ScheduledExecutorService;
3838
import java.util.concurrent.TimeUnit;
39+
import java.util.concurrent.TimeoutException;
3940
import java.util.concurrent.atomic.AtomicInteger;
4041

4142

@@ -49,12 +50,17 @@ public class TestLoadBalancer implements LoadBalancerWithFacilities, WarmUpServi
4950
private final AtomicInteger _completedRequestCount = new AtomicInteger();
5051
private int _warmUpDelayMs = 0;
5152
private int _serviceDataDelayMs = 0;
53+
private boolean _shouldThrowOnGetClient = false;
5254

5355
private final int DELAY_STANDARD_DEVIATION = 5; //ms
5456
private final ScheduledExecutorService _executorService = Executors.newSingleThreadScheduledExecutor();
5557

5658
public TestLoadBalancer() {}
5759

60+
public TestLoadBalancer(boolean shouldThrowOnGetClient) {
61+
_shouldThrowOnGetClient = shouldThrowOnGetClient;
62+
}
63+
5864
public TestLoadBalancer(int warmUpDelayMs)
5965
{
6066
this(warmUpDelayMs, 0);
@@ -69,7 +75,12 @@ public TestLoadBalancer(int warmUpDelayMs, int serviceDataDelayMs)
6975
@Override
7076
public void getClient(Request request, RequestContext requestContext, Callback<TransportClient> clientCallback)
7177
{
72-
clientCallback.onSuccess(new TestClient());
78+
if (_shouldThrowOnGetClient)
79+
{
80+
clientCallback.onError(new TimeoutException());
81+
} else {
82+
clientCallback.onSuccess(new TestClient());
83+
}
7384
}
7485

7586
@Override

d2/src/main/java/com/linkedin/d2/balancer/util/WarmUpLoadBalancer.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,10 @@ private Set<String> getUsedClusters()
441441
@Override
442442
public TransportClient getClient(Request request, RequestContext requestContext) throws ServiceUnavailableException
443443
{
444-
TransportClient client = _loadBalancer.getClient(request, requestContext);
445-
444+
// Add serviceName to _usedServices *before* making the call to _loadBalancer.getClient. Even if
445+
// the call fails, we still *intend* to use serviceName, so it should be in _usedServices.
446446
String serviceName = LoadBalancerUtil.getServiceNameFromUri(request.getURI());
447447
_usedServices.add(serviceName);
448-
return client;
448+
return _loadBalancer.getClient(request, requestContext);
449449
}
450450
}

d2/src/test/java/com/linkedin/d2/balancer/util/WarmUpLoadBalancerTest.java

+19-6
Original file line numberDiff line numberDiff line change
@@ -165,16 +165,24 @@ public void testDeletingFilesAfterShutdown() throws InterruptedException, Execut
165165
}
166166
}
167167

168+
@DataProvider(name = "shouldThrowOnGetClientDataProvider")
169+
public Object[][] shouldThrowOnGetClientDataProvider()
170+
{
171+
return new Object[][]
172+
{{true}, {false}};
173+
}
174+
168175
/**
169176
* Since the list might from the fetcher might not be complete (update service, old data, etc.., and the user might
170177
* require additional services at runtime, we have to check that those services are not cleared from the cache
171-
* otherwise it would incur in a penalty at the next deployment
178+
* otherwise it would incur in a penalty at the next deployment.
179+
* Note that regardless of whether getClient returns successfully or not (if it times out, for example),
180+
* we should still record the service/s we tried to warm up.
172181
*/
173-
@Test(timeOut = 10000, retryAnalyzer = ThreeRetries.class)
174-
public void testNotDeletingFilesGetClient() throws InterruptedException, ExecutionException, TimeoutException, ServiceUnavailableException
175-
{
182+
@Test(dataProvider = "shouldThrowOnGetClientDataProvider", timeOut = 10000, retryAnalyzer = ThreeRetries.class)
183+
public void testNotDeletingFilesGetClient(boolean shouldThrowOnGetClient) throws InterruptedException, ExecutionException, TimeoutException {
176184
createDefaultServicesIniFiles();
177-
TestLoadBalancer balancer = new TestLoadBalancer();
185+
TestLoadBalancer balancer = new TestLoadBalancer(shouldThrowOnGetClient);
178186

179187
List<String> allServicesBeforeShutdown = getAllDownstreamServices();
180188
DownstreamServicesFetcher returnNoDownstreams = callback -> callback.onSuccess(Collections.emptyList());
@@ -190,14 +198,19 @@ public void testNotDeletingFilesGetClient() throws InterruptedException, Executi
190198
warmUpLoadBalancer.start(callback);
191199
callback.get(5000, TimeUnit.MILLISECONDS);
192200

193-
warmUpLoadBalancer.getClient(new URIRequest("d2://" + pickOneService), new RequestContext());
201+
try {
202+
warmUpLoadBalancer.getClient(new URIRequest("d2://" + pickOneService), new RequestContext());
203+
} catch (Exception e) {
204+
Assert.assertTrue(shouldThrowOnGetClient);
205+
}
194206

195207
FutureCallback<None> shutdownCallback = new FutureCallback<>();
196208
warmUpLoadBalancer.shutdown(() -> shutdownCallback.onSuccess(None.none()));
197209
shutdownCallback.get(5000, TimeUnit.MILLISECONDS);
198210

199211
List<String> allServicesAfterShutdown = getAllDownstreamServices();
200212

213+
// regardless of whether getClient returned successfully or threw an Exception, we should still record the service we tried to warm up
201214
Assert.assertEquals(1, allServicesAfterShutdown.size(), "After shutdown there should be just one service, the one that we 'get the client' on");
202215
}
203216

gradle.properties

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
version=29.64.0
1+
version=29.64.1
22
group=com.linkedin.pegasus
33
org.gradle.configureondemand=true
44
org.gradle.parallel=true

0 commit comments

Comments
 (0)