Skip to content
This repository has been archived by the owner on Jun 28, 2024. It is now read-only.

Add support for scheduled retry with retryAfter #94

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,33 @@ const catWatch = new ServiceClient({
});
```

#### Scheduled retries

Timeouts are often the reason for retries. You might want to retry your request *without* waiting for the initial request to timeout, giving more time to the service you are calling to reply to initial request but still keeping the same overal requests timeout.

For this purpose you can use `retryAfter` in `retryOptions` in combination with `dropAllRequestsAfter` in options root. If you have retries configured and initial request has not resolved before `retryAfter`, another request will be sent and whichever requests will resolve first will be treated, aborting the other one(s).

`retryAfter` logic is immediate and will not take into account minTimeout or maxTimeout.
shuhei marked this conversation as resolved.
Show resolved Hide resolved

```js
const {ServiceClient} = require('perron');

const catWatch = new ServiceClient({
hostname: 'catwatch.opensource.zalan.do',
dropAllRequestsAfter: 1000
retryOptions: {
retries: 1,
retryAfter: 400,
shouldRetry(err, req) {
return (err && err.response && err.response.statusCode >= 500);
},
onRetry(currentAttempt, err, req) {
console.log('Retry attempt #' + currentAttempt + ' for ' + req.path + ' due to ' + err);
}
}
});
```

## Filters

It's quite often necessary to do some pre- or post-processing of the request. For this purpose `perron` implements a concept of filters, that are just an object with 2 optional methods: `request` and `response`.
Expand Down
62 changes: 59 additions & 3 deletions lib/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
CircuitBreakerPublicApi
} from "./circuit-breaker";
import { operation } from "./retry";
import { performance } from "perf_hooks";
import * as url from "url";
import {
ConnectionTimeoutError,
Expand Down Expand Up @@ -84,6 +85,7 @@ export class ServiceClientOptions {
public autoParseJson?: boolean;
public retryOptions?: {
retries?: number;
retryAfter?: number;
factor?: number;
minTimeout?: number;
maxTimeout?: number;
Expand All @@ -97,6 +99,7 @@ export class ServiceClientOptions {
};
public circuitBreaker?: false | CircuitBreakerOptions | CircuitBreakerFactory;
public defaultRequestOptions?: Partial<ServiceClientRequestOptions>;
public dropAllRequestsAfter?: number;
}

/**
Expand All @@ -109,6 +112,7 @@ class ServiceClientStrictOptions {
public autoParseJson: boolean;
public retryOptions: {
retries: number;
retryAfter: number;
factor: number;
minTimeout: number;
maxTimeout: number;
Expand All @@ -121,6 +125,7 @@ class ServiceClientStrictOptions {
) => void;
};
public defaultRequestOptions: ServiceClientRequestOptions;
public dropAllRequestsAfter: number;

constructor(options: ServiceClientOptions) {
if (!options.hostname) {
Expand All @@ -140,6 +145,7 @@ class ServiceClientStrictOptions {
minTimeout: 200,
randomize: true,
retries: 0,
retryAfter: 0,
shouldRetry() {
return true;
},
Expand All @@ -163,6 +169,8 @@ class ServiceClientStrictOptions {
timeout: 2000,
...options.defaultRequestOptions
};

this.dropAllRequestsAfter = options.dropAllRequestsAfter || 0;
}
}

Expand Down Expand Up @@ -586,6 +594,7 @@ export class ServiceClient {

const {
retries,
retryAfter,
factor,
minTimeout,
maxTimeout,
Expand All @@ -604,22 +613,54 @@ export class ServiceClient {

const retryErrors: ServiceClientError[] = [];
return new Promise<ServiceClientResponse>((resolve, reject) => {
const timerInitial = performance.now();
const breaker = this.getCircuitBreaker(params);
const retryOperation = operation(opts, (currentAttempt: number) => {
const requestsAbortCallbacks: (() => void)[] = [];
const retryOperation = operation(opts, scheduledRetry => {
breaker.run(
(success: () => void, failure: () => void) => {
if (this.options.dropAllRequestsAfter) {
const timerLeft =
this.options.dropAllRequestsAfter -
(performance.now() - timerInitial);
if (params.dropRequestAfter) {
params.dropRequestAfter = Math.min(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This params object is reused across retries. Is it safe to mutate it? I prefer to clone it for each retry unless its reference equality is used somewhere.

params.dropRequestAfter,
timerLeft
);
} else {
params.dropRequestAfter = timerLeft;
}
}

if (retryAfter) {
params.registerAbortCallback = cb =>
requestsAbortCallbacks.push(cb);
}

return requestWithFilters(
this,
params,
this.options.filters || [],
this.options.autoParseJson
)
.then((result: ServiceClientResponse) => {
if (retryOperation.isResolved()) {
return;
}
retryOperation.resolved();
success();
result.retryErrors = retryErrors;
resolve(result);
if (retryAfter) {
requestsAbortCallbacks.forEach(cb => cb());
requestsAbortCallbacks.length = 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, this is a cleanup for the happy case. Is this array of callbacks cleaned up in the sad case (all requests fail)? I'm not sure if it causes a memory leak only from the code, but I think it's worth checking.

Copy link
Author

@jeremycolin jeremycolin Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes good catch! Will check that

}
})
.catch((error: ServiceClientError) => {
if (retryOperation.isResolved()) {
return;
}
retryErrors.push(error);
failure();
if (!shouldRetry(error, params)) {
Expand All @@ -628,11 +669,12 @@ export class ServiceClient {
);
return;
}
if (!retryOperation.retry()) {
const currentAttempt = retryOperation.retry();
if (!currentAttempt) {
// Wrapping error when user does not want retries would result
// in bad developer experience where you always have to unwrap it
// knowing there is only one error inside, so we do not do that.
if (retries === 0) {
if (retries === 0 || scheduledRetry) {
reject(error);
} else {
reject(
Expand All @@ -654,6 +696,20 @@ export class ServiceClient {
);
});
retryOperation.attempt();

if (retryAfter) {
const retryAfterTimeout = () =>
setTimeout(() => {
if (!retryOperation.isResolved()) {
const currentAttempt = retryOperation.retry(true);
if (currentAttempt) {
onRetry(currentAttempt + 1, undefined, params);
retryAfterTimeout();
}
}
}, retryAfter);
retryAfterTimeout();
}
}).catch((error: unknown) => {
const rawError =
error instanceof Error ? error : new Error(String(error));
Expand Down
34 changes: 23 additions & 11 deletions lib/request.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ export interface ServiceClientRequestOptions extends RequestOptions {
* Opentracing like span interface to log events
*/
span?: Span;
registerAbortCallback?: (abortFunction: () => void) => void;
}

export class ServiceClientResponse {
Expand Down Expand Up @@ -218,6 +219,28 @@ export const request = (
}

const requestObject = httpRequestFn(options);

let dropRequestAfterTimeout: NodeJS.Timer;
if (options.dropRequestAfter) {
dropRequestAfterTimeout = setTimeout(() => {
abortCallback();
}, options.dropRequestAfter);
}

function abortCallback() {
clearTimeout(dropRequestAfterTimeout);
if (!hasRequestEnded) {
requestObject.abort();
const err = new UserTimeoutError(options, timings);
logEvent(EventSource.HTTP_REQUEST, EventName.ERROR, err.message);
Copy link
Contributor

@shuhei shuhei Oct 31, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we want to mark aborted requests as timeouts when aborting because of a successful request?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logging is one thing, but we also need to make sure that this doesn't open the circuit breaker when aborting because of another successful request.

Copy link
Author

@jeremycolin jeremycolin Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once one on the request has succeed, retryOperation will be marked as resolved which will bypass any reject "bubble", see:

catch((error: ServiceClientError) => {
  if (retryOperation.isResolved()) {
    return;
   }
   retryErrors.push(error);
   failure();
   ...

So actually clients are not even aware of the result of this promise. There is a vicious side effect though.: this request is actually never resolved or rejected: is it a problem?

Copy link
Contributor

@shuhei shuhei Nov 1, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, so the circuit breaker command's success() or failure() are not called. Then, it won't make false failure() calls at least!

But isn't the circuit breaker designed with an assumption that one of success() or failure() is called when an operation ends? It seems that the circuit breaker thinks that it is a timeout if success() or failure() are not called. And timeouts contribute to error rate calculation in the circuit breaker, which means it can open the circuit breaker without actual errors.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, we should stop the timeout timer in the circuit breaker when aborting requests because of another successful request. Because calling success() will lower the actual error rate, how about creating another function (cancel or something) in Command?

We may want to mark failures in some cases though. For example, if the first request doesn't respond and the second request succeeds, it makes more sense to mark the first request as a failure.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow very good catch! Will evaluate adding this new function

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok this approach seems to work. I have to write more tests now to cover all the different cases:

  • both requests work
  • first fails
  • second fails
  • first timeouts
  • second timeouts
  • both fail
  • both timeouts

reject(err);
}
}

if (options.registerAbortCallback) {
options.registerAbortCallback(abortCallback);
}

requestObject.setTimeout(readTimeout, () => {
logEvent(EventSource.HTTP_REQUEST, EventName.TIMEOUT);
requestObject.socket.destroy();
Expand Down Expand Up @@ -343,17 +366,6 @@ export const request = (
});
});

if (options.dropRequestAfter) {
setTimeout(() => {
if (!hasRequestEnded) {
requestObject.abort();
const err = new UserTimeoutError(options, timings);
logEvent(EventSource.HTTP_REQUEST, EventName.ERROR, err.message);
reject(err);
}
}, options.dropRequestAfter);
}

if (options.body) {
requestObject.write(options.body);
}
Expand Down
33 changes: 24 additions & 9 deletions lib/retry.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export function operation(
options: OperationOptions,
fn: (currentAttempt: number) => void
fn: (scheduledRetry: boolean) => void
) {
return new RetryOperation(timeouts(options), fn);
}
Expand Down Expand Up @@ -69,28 +69,43 @@ interface CreateTimeoutOptions {

class RetryOperation {
private readonly _timeouts: number[];
private readonly _fn: (currentAttempt: number) => void;
private readonly _fn: (scheduledRetry: boolean) => void;
private _resolved: boolean;
private _attempts: number;
constructor(timeouts: number[], fn: (currentAttempt: number) => void) {
constructor(timeouts: number[], fn: (scheduledRetry: boolean) => void) {
this._timeouts = timeouts;
this._fn = fn;
this._resolved = false;
this._attempts = 1;
}

retry() {
retry(immediate: boolean = false) {
if (this._attempts > this._timeouts.length) {
return false;
}
let timeout = this._timeouts[this._attempts - 1];
if (immediate) {
this._attempts++;
this._fn(immediate);
return this._attempts - 1;
}

setTimeout(() => {
this._attempts++;
this._fn(this._attempts);
}, timeout);
this._fn(immediate);
}, this._timeouts[this._attempts - 1]);

return true;
return this._attempts;
}

attempt() {
this._fn(this._attempts);
this._fn(false);
}

resolved() {
this._resolved = true;
}

isResolved() {
return this._resolved;
}
}
Loading