Skip to content

Commit

Permalink
chore: reduce delay between requests to make maintenance operations "…
Browse files Browse the repository at this point in the history
…nicer"
  • Loading branch information
sneko committed Feb 21, 2024
1 parent faffbb5 commit 4bc628c
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
16 changes: 8 additions & 8 deletions src/features/domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -414,8 +414,8 @@ export async function updateRobotsTxtOnDomains() {
}
}

// Do not flood network
await sleep(1000);
// Do not flood network (tiny delay since it's unlikely a lot consecutive domains would be managed by the same provider)
await sleep(50);
}
}

Expand Down Expand Up @@ -483,8 +483,8 @@ export async function updateWildcardCertificateOnDomains() {
},
});

// Do not flood network
await sleep(1000);
// Do not flood network (tiny delay since it's unlikely a lot consecutive domains would be managed by the same provider)
await sleep(50);
}
}

Expand Down Expand Up @@ -565,8 +565,8 @@ export async function updateWebsiteDataOnDomains() {
) {
const anotherPageUrl = cleanLink;

// Wait a bit to not flood this website
await sleep(1000);
// Wait a bit to not flood this website (tiny delay in this loop because it's just the second request to this domain in this iteration)
await sleep(50);

const anotherPageData = await getWebsiteData(anotherPageUrl);
anotherPageTitle = anotherPageData.title;
Expand Down Expand Up @@ -712,8 +712,8 @@ export async function updateWebsiteDataOnDomains() {
}
}

// Do not flood network
await sleep(1000);
// Do not flood network (tiny delay since it's unlikely a lot consecutive domains would be managed by the same provider)
await sleep(50);
}
}

Expand Down
12 changes: 10 additions & 2 deletions src/features/initiative.ts
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,9 @@ export async function feedInitiativesFromDatabase() {
const results = await site.analyze();

await fs.writeFile(wappalyzerAnalysisPath, JSON.stringify(results, null, 2));

// Wait a bit in case websites from this initiative are on the same servers (tiny delay in this loop because)
await sleep(50);
}

const wappalyzerAnalysisDataString = await fs.readFile(wappalyzerAnalysisPath, 'utf-8');
Expand Down Expand Up @@ -615,6 +618,10 @@ export async function feedInitiativesFromDatabase() {
'--filter': 'blob:limit=200k',
});

// Do not flood network (tiny delay since it seems GitHub has only limitations on the API requests, no the Git operations)
// Ref: https://github.com/orgs/community/discussions/44515#discussioncomment-4795475
await sleep(50);

// `git ls-files` was returning non-UT8 encoding so we were not able to easily delete files.
// A git config is needed to get UT8 encoding (ref: https://stackoverflow.com/a/22828826/3608410)
// (for example `vidéo_48_bicolore.svg` was returned as `vid\303\251o_48_bicolore.svg`)
Expand Down Expand Up @@ -907,8 +914,9 @@ export async function feedInitiativesFromDatabase() {
break; // When successful break the infinite loop
}

// Do not flood network
await sleep(1000);
// Do not flood network (tiny since MistralAI limits us to 5req/s but since the generation usually takes more than a second we are fine)
// (if needed in the future we could look at their rate limit information in headers to wait the appropriate amount of time to retry)
await sleep(50);
}
} catch (error) {
if (error instanceof OpenAI.APIError) {
Expand Down
2 changes: 0 additions & 2 deletions src/features/llm-langchain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ CONTEXT:
// To help the LLM we give inside the context tools we are looking for
// Since we cannot give the 8k+ tools from our database, we try to provide a subset meaningful according to extracted tech references we retrieved
const rawToolsVectors = await this.toolsVectorStore.embeddings.embedDocuments(rawToolsFromAnalysis.filter((item) => item.trim() !== ''));
await sleep(500);

const contextTools: string[] = [];
for (let i = 0; i < rawToolsVectors.length; i++) {
Expand Down Expand Up @@ -426,7 +425,6 @@ CONTEXT:

// We add correction to tools in case the LLM processed them poorly and to adjust to our own internal naming
// Since embeddings are calculated by MistralAI we batch all at once to avoid API rate limiting
await sleep(500);
const resultVectors = await this.toolsVectorStore.embeddings.embedDocuments(result.tools);

for (let i = 0; i < result.tools.length; i++) {
Expand Down

0 comments on commit 4bc628c

Please sign in to comment.