Skip to content

Commit

Permalink
Update jinja dependency (#459)
Browse files Browse the repository at this point in the history
* Make `@huggingface/jinja` a dependency

* Update package-lock.json

* Update JSDoc
  • Loading branch information
xenova authored Dec 18, 2023
1 parent 61cb4f5 commit 1427125
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 25 deletions.
5 changes: 1 addition & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 2 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,12 @@
"homepage": "https://github.com/xenova/transformers.js#readme",
"dependencies": {
"onnxruntime-web": "1.14.0",
"sharp": "^0.32.0"
"sharp": "^0.32.0",
"@huggingface/jinja": "^0.1.0"
},
"optionalDependencies": {
"onnxruntime-node": "1.14.0"
},
"peerDependencies": {
"@huggingface/jinja": "^0.1.0"
},
"devDependencies": {
"@types/jest": "^29.5.1",
"catharsis": "github:xenova/catharsis",
Expand Down
21 changes: 6 additions & 15 deletions src/tokenizers.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import {
CharTrie,
} from './utils/data-structures.js';

import { Template } from '@huggingface/jinja';


/**
* @typedef {Object} TokenizerProperties Additional tokenizer-specific properties.
Expand Down Expand Up @@ -2785,10 +2787,10 @@ export class PreTrainedTokenizer extends Callable {
* { "role": "user", "content": "I'd like to show off how chat templating works!" },
* ]
*
* const text = await tokenizer.apply_chat_template(chat, { tokenize: false });
* const text = tokenizer.apply_chat_template(chat, { tokenize: false });
* // "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]"
*
* const input_ids = await tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
* const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
* // [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793]
* ```
*
Expand All @@ -2806,9 +2808,9 @@ export class PreTrainedTokenizer extends Callable {
* @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
* If not specified, the tokenizer's `max_length` attribute will be used as a default.
* @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
* @returns {Promise<string | Tensor | number[]| number[][]>} A promise that resolves to the tokenized output.
* @returns {string | Tensor | number[]| number[][]} The tokenized output.
*/
async apply_chat_template(conversation, {
apply_chat_template(conversation, {
chat_template = null,
add_generation_prompt = false,
tokenize = true,
Expand All @@ -2823,17 +2825,6 @@ export class PreTrainedTokenizer extends Callable {
// Compilation function uses a cache to avoid recompiling the same template
let compiledTemplate = this._compiled_template_cache.get(chat_template);
if (compiledTemplate === undefined) {
// Dynamically load the `@huggingface/jinja` library. Since this is a peer dependency
// (i.e., must be installed separately), an error is thrown if it is not installed.
let Template;
try {
Template = (await import( /* webpackMode: "eager" */ '@huggingface/jinja')).Template;
} catch (e) {
throw new Error(
`apply_chat_template requires '@huggingface/jinja' to be installed. ` +
`You can install it with \`npm install @huggingface/jinja\`.`
)
}
compiledTemplate = new Template(chat_template);
this._compiled_template_cache.set(chat_template, compiledTemplate);
}
Expand Down
4 changes: 2 additions & 2 deletions tests/tokenizers.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ describe('Chat templates', () => {
{ "role": "user", "content": "I'd like to show off how chat templating works!" },
]

const text = await tokenizer.apply_chat_template(chat, { tokenize: false });
const text = tokenizer.apply_chat_template(chat, { tokenize: false });

expect(text).toEqual("<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]");

const input_ids = await tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
const input_ids = tokenizer.apply_chat_template(chat, { tokenize: true, return_tensor: false });
compare(input_ids, [1, 733, 16289, 28793, 22557, 28725, 910, 460, 368, 28804, 733, 28748, 16289, 28793, 28737, 28742, 28719, 2548, 1598, 28723, 1602, 541, 315, 1316, 368, 3154, 28804, 2, 28705, 733, 16289, 28793, 315, 28742, 28715, 737, 298, 1347, 805, 910, 10706, 5752, 1077, 3791, 28808, 733, 28748, 16289, 28793])
});

Expand Down

0 comments on commit 1427125

Please sign in to comment.