Skip to content

Commit

Permalink
SmolLM2: fixed path specification for 1.7B-Instruct
Browse files Browse the repository at this point in the history
  • Loading branch information
ysjprojects committed Dec 3, 2024
1 parent a46e3f8 commit d06b5ce
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions litgpt/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def token_to_id(self, token: str) -> int:
raise ValueError(f"token {token!r} not found in the collection.")
return id_

def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
if not (tokenizer_config_path := checkpoint_dir / "tokenizer_config.json").is_file():
return False
with open(tokenizer_config_path, encoding="utf-8") as fp:
Expand All @@ -96,7 +96,7 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
# `PreTrainedTokenizerFast`
if checkpoint_dir.stem.startswith(("Meta-Llama-3", "Llama-3")):
return True
if checkpoint_dir.stem.startswith("SmolLM2") and checkpoint_dir.stem.endswith("-Instruct"):
if checkpoint_dir.stem.startswith("SmolLM2") and checkpoint_dir.name.endswith("Instruct"):
return True
if "add_bos_token" in config:
return config["add_bos_token"]
Expand Down

0 comments on commit d06b5ce

Please sign in to comment.