Skip to content

Commit

Permalink
feat(deepl): batch translation requests (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-lacatus authored Aug 5, 2023
1 parent f1edef4 commit d275ccf
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 64 deletions.
21 changes: 13 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,19 @@ DeepL Free is limited to 500,000 characters translated per month.
After you have completed your sign-up, you can pass the API key to
json-autotranslate using the `-c` or `--config` option.

You can also provide a formality by adding it to the config string after the API
key, separated by a comma: `--config apiKey,formality`. This feature currently
only works for target languages "DE" (German), "FR" (French), "IT" (Italian),
"ES" (Spanish), "NL" (Dutch), "PL" (Polish), "PT-PT", "PT-BR" (Portuguese) and
"RU" (Russian).Possible options are:

> "default" (default) "more" - for a more formal language "less" - for a more
> informal language
The value of the `--config` argument is a comma separated string with the following: `appKey,formality,batchSize`.

The `formality` argument currently only works for target languages "DE" (German), "FR" (French), "IT" (Italian),
"ES" (Spanish), "NL" (Dutch), "PL" (Polish), "PT-PT", "PT-BR" (Portuguese) and "RU" (Russian). Possible options are:

- "default" (default)
- "more" - for a more formal language
- "less" - for a more informal language

To improve performance and prevent DeepL rate-limiting json-autotranslate batches multiple tokens into a single translation request.
By default, the `batchSize` is set to `1000`, meaning that `1000` tokens are translated at once. This can be controlled by adjusting the value in the `--config` parameter.
This value was chosen because the DeepL prevents the body of a request to be larger than `128 KiB (128 · 1024 bytes)``. Based on experimentation, even with long tokens, this limit is not reached.


<sup><a href="https://www.deepl.com/de/docs-api/translating-text/">Reference</a></sup>

Expand Down
82 changes: 56 additions & 26 deletions src/services/deepl-free.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ const API_ENDPOINT = 'https://api-free.deepl.com/v2';
export class DeepLFree implements TranslationService {
public name = 'DeepL Free';
private apiKey: string;
/**
* Number to tokens to translate at once
*/
private batchSize: number = 1000;
private supportedLanguages: Set<string>;
private interpolationMatcher: Matcher;
private decodeEscapes: boolean;
Expand All @@ -27,10 +31,12 @@ export class DeepLFree implements TranslationService {
throw new Error(`Please provide an API key for DeepL Free.`);
}

const [apiKey, formality] = config.split(',');
const [apiKey, formality, batchSize] = config.split(',');
this.apiKey = apiKey;
this.formality =
formality === 'less' || formality === 'more' ? formality : 'default';
this.batchSize = isNaN(parseInt(batchSize)) ? 1000 : parseInt(batchSize);

this.interpolationMatcher = interpolationMatcher;
this.supportedLanguages = await this.fetchLanguages();
this.decodeEscapes = decodeEscapes;
Expand Down Expand Up @@ -69,34 +75,49 @@ export class DeepLFree implements TranslationService {
from: string,
to: string,
) {
return Promise.all(
strings.map((string) => this.translateString(string, from, to)),
);
const responses: TranslationResult[] = [];
// Split the translation requests into batches
// This is done because the DeepL API prevents the body of a request to be larger than 128 KiB (128 · 1024 bytes)
// The default batch size is 1000 tokens, as this was found to almost always fit in the limit
for (let i = 0; i < strings.length; i += this.batchSize) {
const chunk = strings.slice(i, i + this.batchSize);

responses.push(...(await this.runTranslation(chunk, from, to)));
}
return responses;
}

async translateString(
string: { key: string; value: string },
async runTranslation(
strings: { key: string; value: string }[],
from: string,
to: string,
triesLeft: number = 5,
): Promise<TranslationResult> {
const { clean, replacements } = replaceInterpolations(
string.value,
this.interpolationMatcher,
): Promise<TranslationResult[]> {
const cleaned = strings.map((s) =>
replaceInterpolations(s.value, this.interpolationMatcher),
);

const url = new URL(`${API_ENDPOINT}/translate`);
url.searchParams.append('text', clean);
url.searchParams.append('source_lang', from.toUpperCase());
url.searchParams.append('target_lang', to.toUpperCase());
url.searchParams.append('auth_key', this.apiKey);
url.searchParams.append('formality', this.formality);
const body = {
text: cleaned.map((c) => c.clean),
source_lang: from.toUpperCase(),
target_lang: to.toUpperCase(),
};

const response = await fetch(String(url));
// send request as a POST request, with all the tokens as separate texts in the body
const response = await fetch(`${API_ENDPOINT}/translate`, {
body: JSON.stringify(body),
method: 'POST',
headers: {
Authorization: `DeepL-Auth-Key ${this.apiKey}`,
'Content-Type': 'application/json',
},
});

if (!response.ok) {
// automatically retry the translation if DeepL rate-limits us
// see https://support.deepl.com/hc/en-us/articles/360020710619-Error-code-429
if (response.status === 429 && triesLeft > 0) {
return this.translateString(string, from, to, triesLeft - 1);
return this.runTranslation(strings, from, to, triesLeft - 1);
}

throw new Error(
Expand All @@ -105,16 +126,25 @@ export class DeepLFree implements TranslationService {
}`,
);
}
// the response is indexed similarly to the texts parameter in the body
const responseTranslations = (await response.json()).translations;

const translated = reInsertInterpolations(
(await response.json()).translations[0].text,
replacements,
const translated = cleaned.map(async (c, index) =>
reInsertInterpolations(responseTranslations[index].text, c.replacements),
);

return {
key: string.key,
value: string.value,
translated: this.decodeEscapes ? decode(translated) : translated,
};
const result: TranslationResult[] = [];

// match the strings to be translated with their retrieved translations
for (let index = 0; index < strings.length; index++) {
const string = strings[index];
const t = await translated[index];
result.push({
key: string.key,
value: string.value,
translated: this.decodeEscapes ? decode(t) : t,
});
}
return result;
}
}
101 changes: 71 additions & 30 deletions src/services/deepl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ const API_ENDPOINT = 'https://api.deepl.com/v2';
export class DeepL implements TranslationService {
public name = 'DeepL';
private apiKey: string;
/**
* Number to tokens to translate at once
*/
private batchSize: number = 1000;
private supportedLanguages: Set<string>;
private formalityLanguages: Set<string>;
private interpolationMatcher: Matcher;
Expand All @@ -27,10 +31,11 @@ export class DeepL implements TranslationService {
if (!config) {
throw new Error(`Please provide an API key for DeepL.`);
}
const [apiKey, formality] = config.split(',');
const [apiKey, formality, batchSize] = config.split(',');
this.apiKey = apiKey;
this.formality =
formality === 'less' || formality === 'more' ? formality : 'default';
this.batchSize = isNaN(parseInt(batchSize)) ? 1000 : parseInt(batchSize);
this.interpolationMatcher = interpolationMatcher;
const languages = await this.fetchLanguages();
this.supportedLanguages = this.formatLanguages(languages);
Expand All @@ -57,12 +62,24 @@ export class DeepL implements TranslationService {
return languages;
}

getFormalityLanguages(languages: Array<{ language: string, name: string, supports_formality: boolean}>) {
getFormalityLanguages(
languages: Array<{
language: string;
name: string;
supports_formality: boolean;
}>,
) {
const supportedLangauges = languages.filter((l) => l.supports_formality);
return this.formatLanguages(supportedLangauges);
}

formatLanguages(languages: Array<{ language: string, name: string, supports_formality: boolean}>) {
formatLanguages(
languages: Array<{
language: string;
name: string;
supports_formality: boolean;
}>,
) {
// DeepL supports e.g. either EN-US or EN as language code, but only returns EN-US
// so we add both variants to the array and filter duplicates later.
const languageCodes = languages.flatMap((l) => [
Expand All @@ -85,38 +102,53 @@ export class DeepL implements TranslationService {
from: string,
to: string,
) {
return Promise.all(
strings.map((string) => this.translateString(string, from, to)),
);
const responses: TranslationResult[] = [];
// Split the translation requests into batches
// This is done because the DeepL API prevents the body of a request to be larger than 128 KiB (128 · 1024 bytes)
// The default batch size is 1000 tokens, as this was found to almost always fit in the limit
for (let i = 0; i < strings.length; i += this.batchSize) {
const chunk = strings.slice(i, i + this.batchSize);

responses.push(...(await this.runTranslation(chunk, from, to)));
}
return responses;
}

async translateString(
string: { key: string; value: string },
async runTranslation(
strings: { key: string; value: string }[],
from: string,
to: string,
triesLeft: number = 5,
): Promise<TranslationResult> {
const { clean, replacements } = replaceInterpolations(
string.value,
this.interpolationMatcher,
): Promise<TranslationResult[]> {
const cleaned = strings.map((s) =>
replaceInterpolations(s.value, this.interpolationMatcher),
);

const url = new URL(`${API_ENDPOINT}/translate`);
url.searchParams.append('text', clean);
url.searchParams.append('source_lang', from.toUpperCase());
url.searchParams.append('target_lang', to.toUpperCase());
url.searchParams.append('auth_key', this.apiKey);
url.searchParams.append('auth_key', this.apiKey);
const body = {
text: cleaned.map((c) => c.clean),
source_lang: from.toUpperCase(),
target_lang: to.toUpperCase(),
};
if (this.supportsFormality(to)) {
//only append formality to avoid bad request error from deepl for languages with unsupported formality
url.searchParams.append('formality', this.formality);
// only append formality to avoid bad request error from deepl for languages with unsupported formality
body['formality'] = this.formality;
}

const response = await fetch(String(url));
// send request as a POST request, with all the tokens as separate texts in the body
const response = await fetch(`${API_ENDPOINT}/translate`, {
body: JSON.stringify(body),
method: 'POST',
headers: {
Authorization: `DeepL-Auth-Key ${this.apiKey}`,
'Content-Type': 'application/json',
},
});

if (!response.ok) {
// automatically retry the translation if DeepL rate-limits us
// see https://support.deepl.com/hc/en-us/articles/360020710619-Error-code-429
if (response.status === 429 && triesLeft > 0) {
return this.translateString(string, from, to, triesLeft - 1);
return this.runTranslation(strings, from, to, triesLeft - 1);
}

throw new Error(
Expand All @@ -125,16 +157,25 @@ export class DeepL implements TranslationService {
}`,
);
}
// the response is indexed similarly to the texts parameter in the body
const responseTranslations = (await response.json()).translations;

const translated = reInsertInterpolations(
(await response.json()).translations[0].text,
replacements,
const translated = cleaned.map(async (c, index) =>
reInsertInterpolations(responseTranslations[index].text, c.replacements),
);

return {
key: string.key,
value: string.value,
translated: this.decodeEscapes ? decode(translated) : translated,
};
const result: TranslationResult[] = [];

// match the strings to be translated with their retrieved translations
for (let index = 0; index < strings.length; index++) {
const string = strings[index];
const t = await translated[index];
result.push({
key: string.key,
value: string.value,
translated: this.decodeEscapes ? decode(t) : t,
});
}
return result;
}
}

0 comments on commit d275ccf

Please sign in to comment.