diff --git a/README.md b/README.md
index 4bd3d8bf75..9cba60e755 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,8 @@ Uses the latest state-of-the-art techniques:
+
+
# Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡
@@ -43,6 +45,8 @@ We reimplemented all model architectures and training recipes from scratch for 4
---
+
+
# Choose from 20+ LLMs
LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials/download_model_weights.md) without layers of abstraction:
@@ -57,6 +61,7 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
See full list of 20+ LLMs
+
#### All models
@@ -88,6 +93,8 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
+
+
## Install LitGPT
Install LitGPT with all dependencies (including CLI, quantization, tokenizers for all models, etc.):
@@ -110,10 +117,9 @@ pip install -e '.[all]'
```
-
-
---
+
# Quick start
After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):
@@ -174,11 +180,11 @@ litgpt finetune \
--data JSON \
--data.json_path my_custom_dataset.json \
--data.val_split_fraction 0.1 \
- --out_dir out/phi-2-lora
+ --out_dir out/custom-model
# 3) Chat with the model
litgpt chat \
- --checkpoint_dir out/phi-2-lora/final
+ --checkpoint_dir out/custom-model/final
```
diff --git a/litgpt/config.py b/litgpt/config.py
index 9ff7825589..e03fa8ae34 100644
--- a/litgpt/config.py
+++ b/litgpt/config.py
@@ -847,8 +847,8 @@ def norm_class(self) -> Type:
name="Llama-3-8B{}",
hf_config=dict(org="meta-llama", name="Meta-Llama-3-8B{}"),
block_size=8192,
- vocab_size=128256,
- padding_multiple=64,
+ vocab_size=128000,
+ padded_vocab_size=128256,
n_layer=32,
n_head=32,
n_query_groups=8,
@@ -865,8 +865,8 @@ def norm_class(self) -> Type:
name="Llama-3-70B{}",
hf_config=dict(org="meta-llama", name="Meta-Llama-3-70B{}"),
block_size=8192,
- vocab_size=128256,
- padding_multiple=64,
+ vocab_size=128000,
+ padded_vocab_size=128256,
n_layer=80,
n_head=64,
n_embd=8192,
diff --git a/litgpt/tokenizer.py b/litgpt/tokenizer.py
index 55c972e69a..8217fcd069 100644
--- a/litgpt/tokenizer.py
+++ b/litgpt/tokenizer.py
@@ -73,11 +73,11 @@ def check_if_bos_token_used(self, checkpoint_dir: Path) -> bool:
return False
with open(tokenizer_config_path, encoding="utf-8") as fp:
config = json.load(fp)
- if any(config.get(check, False) for check in ("add_bos_token", "add_prefix_space")):
- return True
- # for examples that also use the Llama tokenizer, but do not have or set add_bos_token to True.
+ if "add_bos_token" in config:
+ return config["add_bos_token"]
+ # if `add_bos_token` isn't in the config file, but LLaMA tokenizer is used - return True.
# ex: https://huggingface.co/stabilityai/StableBeluga2/blob/main/tokenizer_config.json#L2
- return config.get("add_bos_token") is None and config.get("tokenizer_class") == "LlamaTokenizer"
+ return config.get("tokenizer_class") == "LlamaTokenizer"
def encode(
self,