From b6bd608322be415cec340c9e804265283054a25e Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Tue, 3 Oct 2023 12:35:07 +0200 Subject: [PATCH] Update falcon tokenizer (#344) * Update generate_tests.py * Do not add token types for `FalconTokenizer` --- src/tokenizers.js | 7 +------ tests/generate_tests.py | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/tokenizers.js b/src/tokenizers.js index 49c3a71ca..74d179a6d 100644 --- a/src/tokenizers.js +++ b/src/tokenizers.js @@ -2705,12 +2705,7 @@ export class CodeLlamaTokenizer extends PreTrainedTokenizer { } export class XLMRobertaTokenizer extends PreTrainedTokenizer { } export class MPNetTokenizer extends PreTrainedTokenizer { } -export class FalconTokenizer extends PreTrainedTokenizer { - /** @type {add_token_types} */ - prepare_model_inputs(inputs) { - return add_token_types(inputs); - } -} +export class FalconTokenizer extends PreTrainedTokenizer { } export class GPTNeoXTokenizer extends PreTrainedTokenizer { } diff --git a/tests/generate_tests.py b/tests/generate_tests.py index 2e816459a..adf35b399 100644 --- a/tests/generate_tests.py +++ b/tests/generate_tests.py @@ -10,7 +10,7 @@ # List of tokenizers where the model isn't yet supported, but the tokenizer is ADDITIONAL_TOKENIZERS_TO_TEST = { - 'RefinedWebModel': [ + 'falcon': [ 'tiiuae/falcon-7b', ], "llama": [