From 91a73287880f3604fd401b2ee1a5fe9fa9ff6556 Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Tue, 13 Dec 2022 20:23:38 -0500 Subject: [PATCH 1/2] fix(github): implement handling of soft rate limits --- crawler/lib/connections/github.js | 21 ++++++++++++- crawler/package-lock.json | 52 +++++++++++++++++++++++++------ crawler/package.json | 3 +- 3 files changed, 65 insertions(+), 11 deletions(-) diff --git a/crawler/lib/connections/github.js b/crawler/lib/connections/github.js index ec6f730505d..21ca778d5be 100644 --- a/crawler/lib/connections/github.js +++ b/crawler/lib/connections/github.js @@ -1,8 +1,9 @@ const axios = require('axios'); +const axiosRetryAfter = require('axios-retry-after'); const { GITHUB_ACTOR: githubActor, GITHUB_TOKEN: githubToken } = process.env; -module.exports = axios.create({ +const axiosClient = axios.create({ baseURL: 'https://api.github.com', headers: { Accept: 'application/vnd.github.mercy-preview+json' @@ -11,3 +12,21 @@ module.exports = axios.create({ ? { username: githubActor, password: githubToken } : null }); + +axiosClient.interceptors.response.use(null, axiosRetryAfter(axiosClient, { + // GitHub responds with 403 instead of 429 when soft rate limit is hit + isRetryable (error) { + return ( + error.response && error.response.status === 403 && + error.response.headers['retry-after'] + ); + }, + + wait (error) { + return new Promise( + resolve => setTimeout(resolve, error.response.headers['retry-after'] * 1000) + ); + } +})); + +module.exports = axiosClient; diff --git a/crawler/package-lock.json b/crawler/package-lock.json index a79df07864f..db097218a06 100644 --- a/crawler/package-lock.json +++ b/crawler/package-lock.json @@ -1572,8 +1572,7 @@ "asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=", - "dev": true + "integrity": "sha1-x57Zf380y48robyXkLzDZkdLS3k=" }, "atob": { "version": "2.1.2", @@ -1593,13 +1592,37 @@ "dev": true }, "axios": { - "version": "0.19.2", - "resolved": "https://registry.npmjs.org/axios/-/axios-0.19.2.tgz", - "integrity": "sha512-fjgm5MvRHLhx+osE2xoekY70AhARk3a6hkN+3Io1jc00jtquGvxYlKlsFUhmUET0V5te6CcZI7lcv2Ym61mjHA==", + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.2.1.tgz", + "integrity": "sha512-I88cFiGu9ryt/tfVEi4kX2SITsvDddTajXTOFmt2uK1ZVA8LytjtdeyefdQWEf5PU8w+4SSJDoYnggflB5tW4A==", "requires": { - "follow-redirects": "1.5.10" + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + }, + "dependencies": { + "follow-redirects": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" + }, + "form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + } + } } }, + "axios-retry-after": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/axios-retry-after/-/axios-retry-after-2.0.0.tgz", + "integrity": "sha512-tSB1DEF1bSwXmRNyPcopFsiHAF+PWVq5w2mAK7J0bTltn8x2UnfoSJzTVXPySt/WdrbQL4ES5AXtG9i016+CaA==" + }, "babel-jest": { "version": "25.5.1", "resolved": "https://registry.npmjs.org/babel-jest/-/babel-jest-25.5.1.tgz", @@ -2128,7 +2151,6 @@ "version": "1.0.8", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "dev": true, "requires": { "delayed-stream": "~1.0.0" } @@ -2355,8 +2377,7 @@ "delayed-stream": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=", - "dev": true + "integrity": "sha1-3zrhmayt+31ECqrgsp4icrJOxhk=" }, "delegates": { "version": "1.0.0", @@ -3905,6 +3926,14 @@ "resolved": "https://registry.npmjs.org/async/-/async-1.0.0.tgz", "integrity": "sha1-+PwEyjoTeErenhZBr5hXjPvWR6k=" }, + "axios": { + "version": "0.19.2", + "resolved": "https://registry.npmjs.org/axios/-/axios-0.19.2.tgz", + "integrity": "sha512-fjgm5MvRHLhx+osE2xoekY70AhARk3a6hkN+3Io1jc00jtquGvxYlKlsFUhmUET0V5te6CcZI7lcv2Ym61mjHA==", + "requires": { + "follow-redirects": "1.5.10" + } + }, "camelcase": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-4.1.0.tgz", @@ -7674,6 +7703,11 @@ "resolved": "https://registry.npmjs.org/protocols/-/protocols-1.4.7.tgz", "integrity": "sha512-Fx65lf9/YDn3hUX08XUc0J8rSux36rEsyiv21ZGUC1mOyeM3lTRpZLcrm8aAolzS4itwVfm7TAPyxC2E5zd6xg==" }, + "proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "psl": { "version": "1.8.0", "resolved": "https://registry.npmjs.org/psl/-/psl-1.8.0.tgz", diff --git a/crawler/package.json b/crawler/package.json index e4b41f400a4..011fdb0c217 100644 --- a/crawler/package.json +++ b/crawler/package.json @@ -5,7 +5,8 @@ "author": "Chris Alfano ", "license": "Apache-2.0", "dependencies": { - "axios": "^0.19.2", + "axios": "^1.2.1", + "axios-retry-after": "^2.0.0", "csv-parser": "^2.3.3", "gitsheets": "^0.2.2", "parse-link-header": "^1.0.1", From 74630895c63e2945fd505b7bc50e416cc792f61d Mon Sep 17 00:00:00 2001 From: Chris Alfano Date: Tue, 13 Dec 2022 20:23:56 -0500 Subject: [PATCH 2/2] docs(crawler): add step to lock node14 --- docs/projects/crawler/contributing/crawler.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/projects/crawler/contributing/crawler.md b/docs/projects/crawler/contributing/crawler.md index 1ea52f98678..7a389bb4366 100644 --- a/docs/projects/crawler/contributing/crawler.md +++ b/docs/projects/crawler/contributing/crawler.md @@ -4,6 +4,19 @@ The Crawler is built with Node.js and development is primarily supported for [Vi ## After Cloning or Pulling +Make sure you're using node 14: + +```bash +node --version +``` + +On Mac with node installed via Homebrew, you can switch your current shell to node 14: + +```bash +brew install node@14 +export PATH="/usr/local/opt/node@14/bin:$PATH" +``` + Install node modules: ```bash