Merge branch 'main' into phi_hf_create_from_transformers

Lightning-AI · Apr 23, 2024 · 8c0760d · 8c0760d
2 parents de45214 + a91b520
commit 8c0760d
Show file tree

Hide file tree

Showing 3 changed files with 18 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -29,6 +29,8 @@ Uses the latest state-of-the-art techniques:
 
 </div>
 
+&nbsp;
+<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/GithubLitGPTDAG2.png" alt="LitGPT steps" width="auto"/>
 &nbsp;
 
 # Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡   
@@ -43,6 +45,8 @@ We reimplemented all model architectures and training recipes from scratch for 4
 
 ---
 
+&nbsp;
+
 # Choose from 20+ LLMs
 LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials/download_model_weights.md) without layers of abstraction:   
 
@@ -57,6 +61,7 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
 
 <details>
   <summary>See full list of 20+ LLMs</summary>
+
 &nbsp; 
 
 #### All models
@@ -88,6 +93,8 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
 
 </details>
 
+&nbsp;
+
 ## Install LitGPT
 
 Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.):
@@ -110,10 +117,9 @@ pip install -e '.[all]'
 ```
 </details>
 
-&nbsp;
-
 ---
 
+&nbsp;
 # Quick start
 After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):    
 
@@ -174,11 +180,11 @@ litgpt finetune \
   --data JSON \
   --data.json_path my_custom_dataset.json \
   --data.val_split_fraction 0.1 \
-  --out_dir out/phi-2-lora
+  --out_dir out/custom-model
 
 # 3) Chat with the model
 litgpt chat \
-  --checkpoint_dir out/phi-2-lora/final
+  --checkpoint_dir out/custom-model/final
 ```
 
 &nbsp;

diff --git a/litgpt/config.py b/litgpt/config.py
@@ -847,8 +847,8 @@ def norm_class(self) -> Type:
         name="Llama-3-8B{}",
         hf_config=dict(org="meta-llama", name="Meta-Llama-3-8B{}"),
         block_size=8192,
-        vocab_size=128256,
-        padding_multiple=64,
+        vocab_size=128000,
+        padded_vocab_size=128256,
         n_layer=32,
         n_head=32,
         n_query_groups=8,
@@ -865,8 +865,8 @@ def norm_class(self) -> Type:
         name="Llama-3-70B{}",
         hf_config=dict(org="meta-llama", name="Meta-Llama-3-70B{}"),
         block_size=8192,
-        vocab_size=128256,
-        padding_multiple=64,
+        vocab_size=128000,
+        padded_vocab_size=128256,
         n_layer=80,
         n_head=64,
         n_embd=8192,

diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py
@@ -73,11 +73,11 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
             return False
         with open(tokenizer_config_path, encoding="utf-8") as fp:
             config = json.load(fp)
-        if any(config.get(check, False) for check in ("add_bos_token", "add_prefix_space")):
-            return True
-        # for examples that also use the Llama tokenizer, but do not have or set add_bos_token to True.
+        if "add_bos_token" in config:
+            return config["add_bos_token"]
+        # if `add_bos_token` isn't in the config file, but LLaMA tokenizer is used - return True.
         # ex: https://huggingface.co/stabilityai/StableBeluga2/blob/main/tokenizer_config.json#L2
-        return config.get("add_bos_token") is None and config.get("tokenizer_class") == "LlamaTokenizer"
+        return config.get("tokenizer_class") == "LlamaTokenizer"
 
     def encode(
         self,