-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
🥅 455 - Error handling and retries + job reporting #463
Changes from 3 commits
e92dab7
b25cca9
9cb3949
041962a
d7e93d1
4d3a9d1
7880d28
5a1d79b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,11 @@ import urlJoin from 'url-join'; | |
import { getAppConfig } from '../../../config'; | ||
import logger from '../../../logger'; | ||
|
||
import axiosRetry from 'axios-retry'; | ||
import pThrottle from '../../../../pThrottle'; | ||
import { EGA_API } from '../../../utils/constants'; | ||
import { DacAccessionId, DatasetAccessionId } from '../types/common'; | ||
import { DEFAULT_RETRIES } from '../types/constants'; | ||
import { BadRequestError, NotFoundError, ServerError } from '../types/errors'; | ||
import { ApprovePermissionRequest, PermissionRequest, RevokePermission } from '../types/requests'; | ||
import { | ||
|
@@ -50,7 +52,7 @@ import { | |
} from '../types/results'; | ||
import { safeParseArray, ZodResultAccumulator } from '../types/zodSafeParseArray'; | ||
import { ApprovedUser, getErrorMessage } from '../utils'; | ||
import { fetchAccessToken, tokenExpired } from './idpClient'; | ||
import { fetchAccessToken, isTokenExpired } from './idpClient'; | ||
|
||
const { DACS, DATASETS, PERMISSIONS, REQUESTS, USERS } = EGA_API; | ||
|
||
|
@@ -61,12 +63,14 @@ const initApiAxiosClient = () => { | |
const { | ||
ega: { apiUrl }, | ||
} = getAppConfig(); | ||
return axios.create({ | ||
const client = axios.create({ | ||
baseURL: apiUrl, | ||
headers: { | ||
'Content-Type': 'application/json', | ||
}, | ||
}); | ||
axiosRetry(client, { retries: DEFAULT_RETRIES }); | ||
return client; | ||
}; | ||
const apiAxiosClient = initApiAxiosClient(); | ||
|
||
|
@@ -84,9 +88,9 @@ export const egaApiClient = async () => { | |
|
||
const getAccessToken = async (): Promise<IdpToken> => { | ||
if (currentToken) { | ||
const tokenIsExpired = await tokenExpired(currentToken); | ||
const tokenIsExpired = await isTokenExpired(currentToken); | ||
if (tokenIsExpired) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm looking at where this tokenExpiry check is called, and I'm not sure if it is useful? since the only places There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for describing this, I think you are correct in this context. As written, we don't call this before using the EGA axios client, instead we store the current token in the ega axios client and then update it with the fresh token after we retrieve it and that keeps our client up to date. Since this is a cron-job, the client is created, then constantly used, and then the job ends - that makes this pattern acceptable. If this client was being used in a long lived server where the client could sit unused for a while and then the token go stale, then we would be better served to always call I went back and forth on this message like 3 times so I'll probably come up with some new ideas by the time you read this..... |
||
logger.info('token is expired'); | ||
logger.debug('Token is expired.'); | ||
resetAccessToken(); | ||
} else { | ||
return currentToken; | ||
|
@@ -117,6 +121,13 @@ export const egaApiClient = async () => { | |
interval: maxRequestInterval, | ||
}); | ||
|
||
// throttled axios methods | ||
const throttledDelete = throttle(apiAxiosClient.delete); | ||
const throttledGet = throttle(apiAxiosClient.get); | ||
const throttledPost = throttle(apiAxiosClient.post); | ||
const throttledPut = throttle(apiAxiosClient.put); | ||
const throttledGenericRequest = throttle(apiAxiosClient.request); | ||
|
||
Comment on lines
+129
to
+135
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
const accessToken = await getAccessToken(); | ||
apiAxiosClient.defaults.headers.common['Authorization'] = `Bearer ${accessToken.access_token}`; | ||
|
||
|
@@ -142,7 +153,7 @@ export const egaApiClient = async () => { | |
// reset on client headers so subsequent requests have new access token | ||
apiAxiosClient.defaults.headers['Authorization'] = refreshedBearerToken; | ||
// returns Promise for original request | ||
return apiAxiosClient.request(error.config); | ||
return throttledGenericRequest(error.config); | ||
case 400: | ||
// don't retry | ||
logger.error(`Bad Request`); | ||
|
@@ -157,13 +168,13 @@ export const egaApiClient = async () => { | |
`${CLIENT_NAME} - ${error.response.status} - ${error.response.statusText} - retrying original request.`, | ||
); | ||
// retry original request. this response error shouldn't be an issue because throttling is in place | ||
return apiAxiosClient.request(error.config); | ||
return throttledGenericRequest(error.config); | ||
case 504: | ||
logger.error( | ||
`${CLIENT_NAME} - ${error.response.status} - ${error.response.statusText} - retrying original request.`, | ||
); | ||
// retry original request | ||
return apiAxiosClient.request(error.config); | ||
return throttledGenericRequest(error.config); | ||
default: | ||
logger.error(`Unexpected Axios Error: ${error.response.status}`); | ||
return new ServerError('Unexpected Axios Error'); | ||
|
@@ -177,7 +188,7 @@ export const egaApiClient = async () => { | |
logger.error(`${CLIENT_NAME} - AxiosError - ECONNRESET`); | ||
if (originalRequest) { | ||
logger.info(`${CLIENT_NAME} - ECONNRESET - retrying original request`); | ||
return apiAxiosClient.request(originalRequest); | ||
return throttledGenericRequest(originalRequest); | ||
} | ||
return Promise.reject(error); | ||
case 'ERR_BAD_REQUEST': | ||
|
@@ -203,7 +214,7 @@ export const egaApiClient = async () => { | |
): Promise<Result<ZodResultAccumulator<EgaDataset>, GetDatasetsForDacFailure>> => { | ||
const url = urlJoin(DACS, dacId, DATASETS); | ||
try { | ||
const { data } = await apiAxiosClient.get(url); | ||
const { data } = await throttledGet(url); | ||
const result = safeParseArray(EgaDataset, data); | ||
return success(result); | ||
} catch (err) { | ||
|
@@ -229,7 +240,7 @@ export const egaApiClient = async () => { | |
const getUser = async (user: ApprovedUser): Promise<Result<EgaUser, GetUserFailure>> => { | ||
const url = urlJoin(USERS, user.email); | ||
try { | ||
const response = await apiAxiosClient.get(url); | ||
const response = await throttledGet(url); | ||
const egaUser = EgaUser.safeParse(response.data); | ||
if (egaUser.success) { | ||
return success(egaUser.data); | ||
|
@@ -269,7 +280,7 @@ export const egaApiClient = async () => { | |
}): Promise<Result<ZodResultAccumulator<EgaPermission>, GetPermissionsForDatasetFailure>> => { | ||
const url = urlJoin(DACS, dacId, PERMISSIONS); | ||
try { | ||
const response = await apiAxiosClient.get(url, { | ||
const response = await throttledGet(url, { | ||
params: { | ||
dataset_accession_id: datasetAccessionId, | ||
limit, | ||
|
@@ -303,7 +314,7 @@ export const egaApiClient = async () => { | |
> => { | ||
try { | ||
const url = urlJoin(PERMISSIONS); | ||
const response = await apiAxiosClient.get(url, { | ||
const response = await throttledGet(url, { | ||
params: { | ||
user_id: userId, | ||
limit: datasetsTotal, | ||
|
@@ -359,7 +370,7 @@ export const egaApiClient = async () => { | |
Result<ZodResultAccumulator<EgaPermissionRequest>, CreatePermissionRequestsFailure> | ||
> => { | ||
try { | ||
const { data } = await apiAxiosClient.post(REQUESTS, requests); | ||
const { data } = await throttledPost(REQUESTS, requests); | ||
const result = safeParseArray(EgaPermissionRequest, data); | ||
return success(result); | ||
} catch (err) { | ||
|
@@ -387,7 +398,7 @@ export const egaApiClient = async () => { | |
requests: ApprovePermissionRequest[], | ||
): Promise<Result<ApprovePermissionResponse, ApprovedPermissionRequestsFailure>> => { | ||
try { | ||
const response = await apiAxiosClient.put(REQUESTS, requests); | ||
const response = await throttledPut(REQUESTS, requests); | ||
if (response.data) { | ||
const result = ApprovePermissionResponse.safeParse(response.data); | ||
if (result.success) { | ||
|
@@ -424,7 +435,7 @@ export const egaApiClient = async () => { | |
requests: RevokePermission[], | ||
): Promise<Result<RevokePermissionResponse, RevokePermissionsFailure>> => { | ||
try { | ||
const response = await apiAxiosClient.delete(PERMISSIONS, { data: requests }); | ||
const response = await throttledDelete(PERMISSIONS, { data: requests }); | ||
if (response.status === 400) { | ||
throw new BadRequestError('Permission not found.'); | ||
} | ||
|
@@ -452,13 +463,13 @@ export const egaApiClient = async () => { | |
}; | ||
|
||
return { | ||
approvePermissionRequests: throttle(approvePermissionRequests), | ||
createPermissionRequests: throttle(createPermissionRequests), | ||
getDatasetsForDac: throttle(getDatasetsForDac), | ||
getPermissionsByUserId: throttle(getPermissionsByUserId), | ||
getPermissionsForDataset: throttle(getPermissionsForDataset), | ||
getUser: throttle(getUser), | ||
revokePermissions: throttle(revokePermissions), | ||
approvePermissionRequests, | ||
createPermissionRequests, | ||
getDatasetsForDac, | ||
getPermissionsByUserId, | ||
getPermissionsForDataset, | ||
getUser, | ||
revokePermissions, | ||
}; | ||
}; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,13 +18,15 @@ | |
*/ | ||
|
||
import axios, { AxiosError } from 'axios'; | ||
import axiosRetry from 'axios-retry'; | ||
import jwt from 'jsonwebtoken'; | ||
import urlJoin from 'url-join'; | ||
import { getAppConfig } from '../../../config'; | ||
import logger from '../../../logger'; | ||
import getAppSecrets from '../../../secrets'; | ||
|
||
import { EGA_GRANT_TYPE, EGA_REALMS_PATH, EGA_TOKEN_ENDPOINT } from '../../../utils/constants'; | ||
import { DEFAULT_RETRIES } from '../types/constants'; | ||
import { IdpToken } from '../types/responses'; | ||
const { verify } = jwt; | ||
|
||
|
@@ -39,7 +41,7 @@ const CLIENT_NAME = 'IDP_CLIENT'; | |
const decodeToken = async ( | ||
token: string, | ||
): Promise<jwt.JwtPayload | 'TokenExpiredError' | undefined> => { | ||
logger.info('Verifying token'); | ||
logger.debug('Verifying token'); | ||
const { | ||
auth: { egaPublicKey }, | ||
} = await getAppSecrets(); | ||
|
@@ -61,11 +63,12 @@ const decodeToken = async ( | |
}; | ||
|
||
/** | ||
* Uses jsonwebtoken.verify to validate token is not expired | ||
* Uses jsonwebtoken.verify to validate token is not expired. | ||
* Returns true if token is expired, otherwise returns false; the token may be invalid and still return false | ||
* @param token IdpToken | ||
* @returns Promise<boolean> | ||
*/ | ||
export const tokenExpired = async (token: IdpToken): Promise<boolean> => { | ||
export const isTokenExpired = async (token: IdpToken): Promise<boolean> => { | ||
const decoded = await decodeToken(token.access_token); | ||
return decoded === 'TokenExpiredError'; | ||
}; | ||
|
@@ -75,9 +78,11 @@ const initIdpClient = () => { | |
const { | ||
ega: { authHost }, | ||
} = getAppConfig(); | ||
return axios.create({ | ||
const client = axios.create({ | ||
baseURL: authHost, | ||
}); | ||
axiosRetry(client, { retries: DEFAULT_RETRIES }); | ||
return client; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added same retry strategy here |
||
}; | ||
const idpClient = initIdpClient(); | ||
|
||
|
@@ -100,7 +105,7 @@ idpClient.interceptors.response.use( | |
logger.error(`${CLIENT_NAME} - AxiosError - ECONNRESET`); | ||
const originalRequest = error.config; | ||
if (originalRequest) { | ||
logger.info(`${CLIENT_NAME} - retrying original request`); | ||
logger.debug(`${CLIENT_NAME} - retrying original request`); | ||
return idpClient.request(originalRequest); | ||
} | ||
break; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,41 @@ const JOB_NAME = 'RECONCILE_EGA_PERMISSIONS'; | |
* 5) Process existing permissions for each dataset + revoke those which belong to users not on the DACO approved list | ||
* 6) Return completed JobReport | ||
* @returns Promise<JobReport<ReconciliationJobReport>> | ||
* @example | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added this example report to show how the completionStatus lines up with the |
||
* // returns { | ||
"startedAt": "2024-11-25T22:14:57.306Z", | ||
"finishedAt": "2024-11-26T01:52:51.339Z", | ||
"jobName": "RECONCILE_EGA_PERMISSIONS", | ||
"success": true, | ||
"details": { | ||
"approvedDacoUsersCount": 1514, | ||
"approvedEgaUsersCount": 1514, | ||
"datasetsCount": 503, | ||
"permissionsCreated": { | ||
"startTime": "2024-11-25T22:26:38.344Z", | ||
"endTime": "2024-11-26T00:04:22.968Z", | ||
"timeElapsed": "97 minutes", | ||
"completionStatus": "SUCCESS", | ||
"details": { | ||
"numUsersSuccessfullyProcessed": 1514, | ||
"numUsersWithNewPermissions": 1402, | ||
"errors": [] | ||
} | ||
}, | ||
"permissionsRevoked": { | ||
"startTime": "2024-11-26T00:04:22.980Z", | ||
"endTime": "2024-11-26T01:52:51.331Z", | ||
"timeElapsed": "108 minutes", | ||
"completionStatus": "SUCCESS", | ||
"details": { | ||
"numDatasetsProcessed": 503, | ||
"numDatasetsWithPermissionsRevoked": 293, | ||
"errors": [], | ||
"datasetsWithIncorrectPermissionsCounts": [] | ||
} | ||
} | ||
} | ||
} | ||
*/ | ||
async function runEgaPermissionsReconciliation(): Promise<JobReport<ReconciliationJobReport>> { | ||
const startTime = new Date(); | ||
|
@@ -110,7 +145,8 @@ async function runEgaPermissionsReconciliation(): Promise<JobReport<Reconciliati | |
logger.info(`${JOB_NAME} - Job took ${timeElapsed} minutes to complete.`); | ||
|
||
const reportHasErrors = | ||
permissionsCreatedResult.details.errors.length | permissionsRevokedResult.details.errors.length; | ||
permissionsCreatedResult.details.errors.length > 0 || | ||
permissionsRevokedResult.details.errors.length > 0; | ||
const permissionsReconciliationJobReport: JobReport<ReconciliationJobReport> = { | ||
startedAt: startTime, | ||
finishedAt: endTime, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added
axios-retry
and set max retries to 3. When i ran this locally i didn't encounter any retryable errors beyond this number, but the job report on completion didn't indicate that any user/dataset request was missed. All the totals lined up to what I expected.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Did you run or write a test to see how this behaves when it does fail 3+ times?