From 1427125dc350143ca51691c59f35e11438d78155 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Mon, 18 Dec 2023 23:22:24 +0200 Subject: [PATCH] Update jinja dependency (#459) * Make `@huggingface/jinja` a dependency * Update package-lock.json * Update JSDoc --- package-lock.json | 5 +---- package.json | 6 ++---- src/tokenizers.js | 21 ++++++--------------- tests/tokenizers.test.js | 4 ++-- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/package-lock.json b/package-lock.json index f615400c2..60ee8f62b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "2.12.0", "license": "Apache-2.0", "dependencies": { + "@huggingface/jinja": "^0.1.0", "onnxruntime-web": "1.14.0", "sharp": "^0.32.0" }, @@ -27,9 +28,6 @@ }, "optionalDependencies": { "onnxruntime-node": "1.14.0" - }, - "peerDependencies": { - "@huggingface/jinja": "^0.1.0" } }, "node_modules/@ampproject/remapping": { @@ -750,7 +748,6 @@ "version": "0.1.0", "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.1.0.tgz", "integrity": "sha512-NgZ0imvGPHblw+nFJN2eC+so0DmvLSEieldI7gjZZbBUDE80ypG1O+DibdeWne1vQuGBYV/pC3XL//SgxiXC7g==", - "peer": true, "engines": { "node": ">=18" } diff --git a/package.json b/package.json index de165ab8e..542f5cc53 100644 --- a/package.json +++ b/package.json @@ -39,14 +39,12 @@ "homepage": "https://github.com/xenova/transformers.js#readme", "dependencies": { "onnxruntime-web": "1.14.0", - "sharp": "^0.32.0" + "sharp": "^0.32.0", + "@huggingface/jinja": "^0.1.0" }, "optionalDependencies": { "onnxruntime-node": "1.14.0" }, - "peerDependencies": { - "@huggingface/jinja": "^0.1.0" - }, "devDependencies": { "@types/jest": "^29.5.1", "catharsis": "github:xenova/catharsis", diff --git a/src/tokenizers.js b/src/tokenizers.js index 50f1bd490..71ffc8683 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -41,6 +41,8 @@ import { CharTrie, } from './utils/data-structures.js'; +import { Template } from '@huggingface/jinja'; + /** * @typedef {Object} TokenizerProperties Additional tokenizer-specific properties. @@ -2785,10 +2787,10 @@ export class PreTrainedTokenizer extends Callable { * { "role": "user", "content": "I'd like to show off how chat templating works!" }, * ] * - * const text = await tokenizer.apply_chat_template(chat, { tokenize: false }); + * const text = tokenizer.apply_chat_template(chat, { tokenize: false }); * // "[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]" * - * const input_ids = await tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false }); + * const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false }); * // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793] * ``` * @@ -2806,9 +2808,9 @@ export class PreTrainedTokenizer extends Callable { * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false. * If not specified, the tokenizer's `max_length` attribute will be used as a default. * @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false. - * @returns {Promise} A promise that resolves to the tokenized output. + * @returns {string | Tensor | number[]| number[][]} The tokenized output. */ - async apply_chat_template(conversation, { + apply_chat_template(conversation, { chat_template = null, add_generation_prompt = false, tokenize = true, @@ -2823,17 +2825,6 @@ export class PreTrainedTokenizer extends Callable { // Compilation function uses a cache to avoid recompiling the same template let compiledTemplate = this._compiled_template_cache.get(chat_template); if (compiledTemplate === undefined) { - // Dynamically load the `@huggingface/jinja` library. Since this is a peer dependency - // (i.e., must be installed separately), an error is thrown if it is not installed. - let Template; - try { - Template = (await import( /* webpackMode: "eager" */ '@huggingface/jinja')).Template; - } catch (e) { - throw new Error( - `apply_chat_template requires '@huggingface/jinja' to be installed. ` + - `You can install it with \`npm install @huggingface/jinja\`.` - ) - } compiledTemplate = new Template(chat_template); this._compiled_template_cache.set(chat_template, compiledTemplate); } diff --git a/tests/tokenizers.test.js b/tests/tokenizers.test.js index f0147c7c1..bc2bf1208 100644 --- a/tests/tokenizers.test.js +++ b/tests/tokenizers.test.js @@ -189,11 +189,11 @@ describe('Chat templates', () => { { "role": "user", "content": "I'd like to show off how chat templating works!" }, ] - const text = await tokenizer.apply_chat_template(chat, { tokenize: false }); + const text = tokenizer.apply_chat_template(chat, { tokenize: false }); expect(text).toEqual("[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today? [INST] I'd like to show off how chat templating works! [/INST]"); - const input_ids = await tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false }); + const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false }); compare(input_ids, [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]) });