diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py index a81c59aa2d..ed78ca550d 100644 --- a/litgpt/tokenizer.py +++ b/litgpt/tokenizer.py @@ -94,7 +94,7 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: config = json.load(fp) # for LlaMA-3 tokenizer there is no `add_bos_token` at all and `tokenizer_class` is only # `PreTrainedTokenizerFast` - if checkpoint_dir.stem.startswith(("Meta-Llama-3", "Llama-3")): + if checkpoint_dir.stem.startswith(("Meta-Llama-3", "Llama-3", "SmolLM2")): return True if "add_bos_token" in config: return config["add_bos_token"]