diff --git a/README.md b/README.md index 4bd3d8bf75..9cba60e755 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,8 @@ Uses the latest state-of-the-art techniques: +  +LitGPT steps   # Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡ @@ -43,6 +45,8 @@ We reimplemented all model architectures and training recipes from scratch for 4 --- +  + # Choose from 20+ LLMs LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials/download_model_weights.md) without layers of abstraction: @@ -57,6 +61,7 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
See full list of 20+ LLMs +   #### All models @@ -88,6 +93,8 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
+  + ## Install LitGPT Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.): @@ -110,10 +117,9 @@ pip install -e '.[all]' ``` -  - --- +  # Quick start After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...): @@ -174,11 +180,11 @@ litgpt finetune \ --data JSON \ --data.json_path my_custom_dataset.json \ --data.val_split_fraction 0.1 \ - --out_dir out/phi-2-lora + --out_dir out/custom-model # 3) Chat with the model litgpt chat \ - --checkpoint_dir out/phi-2-lora/final + --checkpoint_dir out/custom-model/final ```   diff --git a/litgpt/config.py b/litgpt/config.py index 9ff7825589..e03fa8ae34 100644 --- a/litgpt/config.py +++ b/litgpt/config.py @@ -847,8 +847,8 @@ def norm_class(self) -> Type: name="Llama-3-8B{}", hf_config=dict(org="meta-llama", name="Meta-Llama-3-8B{}"), block_size=8192, - vocab_size=128256, - padding_multiple=64, + vocab_size=128000, + padded_vocab_size=128256, n_layer=32, n_head=32, n_query_groups=8, @@ -865,8 +865,8 @@ def norm_class(self) -> Type: name="Llama-3-70B{}", hf_config=dict(org="meta-llama", name="Meta-Llama-3-70B{}"), block_size=8192, - vocab_size=128256, - padding_multiple=64, + vocab_size=128000, + padded_vocab_size=128256, n_layer=80, n_head=64, n_embd=8192, diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py index 55c972e69a..8217fcd069 100644 --- a/litgpt/tokenizer.py +++ b/litgpt/tokenizer.py @@ -73,11 +73,11 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool: return False with open(tokenizer_config_path, encoding="utf-8") as fp: config = json.load(fp) - if any(config.get(check, False) for check in ("add_bos_token", "add_prefix_space")): - return True - # for examples that also use the Llama tokenizer, but do not have or set add_bos_token to True. + if "add_bos_token" in config: + return config["add_bos_token"] + # if `add_bos_token` isn't in the config file, but LLaMA tokenizer is used - return True. # ex: https://huggingface.co/stabilityai/StableBeluga2/blob/main/tokenizer_config.json#L2 - return config.get("add_bos_token") is None and config.get("tokenizer_class") == "LlamaTokenizer" + return config.get("tokenizer_class") == "LlamaTokenizer" def encode( self,