From d06b5ceb134d28b78c6fba1cf790ab099856bd71 Mon Sep 17 00:00:00 2001 From: Yu Shi Jie Date: Tue, 3 Dec 2024 12:14:28 -0500 Subject: [PATCH] SmolLM2: fixed path specification for 1.7B-Instruct --- litgpt/tokenizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py index 41aa0dd08a..acfc0493a7 100644 --- a/litgpt/tokenizer.py +++ b/litgpt/tokenizer.py @@ -87,7 +87,7 @@ def token_to_id(self, token: str) -> int: raise ValueError(f"token {token!r} not found in the collection.") return id_ - def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: + def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: if not (tokenizer_config_path := checkpoint_dir / "tokenizer_config.json").is_file(): return False with open(tokenizer_config_path, encoding="utf-8") as fp: @@ -96,7 +96,7 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: # `PreTrainedTokenizerFast` if checkpoint_dir.stem.startswith(("Meta-Llama-3", "Llama-3")): return True - if checkpoint_dir.stem.startswith("SmolLM2") and checkpoint_dir.stem.endswith("-Instruct"): + if checkpoint_dir.stem.startswith("SmolLM2") and checkpoint_dir.name.endswith("Instruct"): return True if "add_bos_token" in config: return config["add_bos_token"]