Merge branch 'main' into likethecognac-patch-1

Lightning-AI · Apr 22, 2024 · 1f22d93 · 1f22d93
2 parents 685867b + 54628ec
commit 1f22d93
Show file tree

Hide file tree

Showing 10 changed files with 474 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
 <div align="center">
-<img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/LitGPTRebrand.png" alt="LitGPT" width="128"/>
 
 
 # ⚡ LitGPT
@@ -19,11 +18,11 @@ Uses the latest state-of-the-art techniques:
 <p align="center">
   <a href="https://lightning.ai/">Lightning AI</a> •
   <a href="#choose-from-20-llms">Models</a> •
-  <a href="#install-litgpt">Install</a> •
-  <a href="#get-started">Get started</a> •
-  <a href="#use-an-llm">Evaluate</a> •
+  <a href="#quick-start">Quick start</a> •
+  <a href="#use-an-llm-for-inference">Inference</a> •
   <a href="#finetune-an-llm">Finetune</a> •
   <a href="#finetune-an-llm">Pretrain</a> •
+    <a href="#deploy-an-llm">Deploy</a> •
   <a href="#state-of-the-art-features">Features</a> •
   <a href="#training-recipes">Training recipes (YAML)</a>
 </p>
@@ -35,13 +34,13 @@ Uses the latest state-of-the-art techniques:
 &nbsp;
 
 # Finetune, pretrain and deploy LLMs Lightning fast ⚡⚡   
-LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and deploy [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large-language-models (LLMs).
+LitGPT is a command-line tool designed to easily [finetune](#finetune-an-llm), [pretrain](#pretrain-an-llm), [evaluate](#use-an-llm), and [deploy](#deploy-an-llm) [20+ LLMs](#choose-from-20-llms) **on your own data**. It features highly-optimized [training recipes](#training-recipes) for the world's most powerful open-source large language models (LLMs).
 
 We reimplemented all model architectures and training recipes from scratch for 4 reasons:   
 
 1. Remove all abstraction layers and have single file implementations.   
 2. Guarantee Apache 2.0 compliance to enable enterprise use without limits.    
-3. Optimized each model architectural detail to maximize performance, reduce costs, and speed up training.    
+3. Optimized each model's architectural detail to maximize performance, reduce costs, and speed up training.    
 4. Highly-optimized [recipe configs](#training-recipes) we have tested at enterprise scale.               
 
 ---
@@ -53,6 +52,7 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
 
 | Model | Model size | Author | Reference |
 |----|----|----|----|
+| Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3)                                                                     |
 | Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288)                                                                      |
 | Code Llama | 7B, 13B, 34B, 70B | Meta AI | [Rozière et al. 2023](https://arxiv.org/abs/2308.12950) |
 | Mistral | 7B | Mistral AI | [Mistral website](https://mistral.ai/)                                                                                       |
@@ -76,6 +76,7 @@ LitGPT has 🤯 **custom, from-scratch implementations** of [20+ LLMs](tutorials
 | Function Calling Llama 2 | 7B | Trelis | [Trelis et al. 2023](https://huggingface.co/Trelis/Llama-2-7b-chat-hf-function-calling-v2)                                   |
 | Gemma | 2B, 7B | Google | [Google Team, Google Deepmind](https://storage.googleapis.com/deepmind-media/gemma/gemma-report.pdf)                         |
 | Llama 2 | 7B, 13B, 70B | Meta AI | [Touvron et al. 2023](https://arxiv.org/abs/2307.09288)                                                                      |
+| Llama 3 | 8B, 70B | Meta AI | [Meta AI 2024](https://github.com/meta-llama/llama3)                                                                     |
 | LongChat | 7B, 13B | LMSYS | [LongChat Team 2023](https://lmsys.org/blog/2023-06-29-longchat/)                                                            |
 | Mistral | 7B | Mistral AI | [Mistral website](https://mistral.ai/)                                                                                       |
 | Nous-Hermes | 7B, 13B, 70B | NousResearch | [Org page](https://huggingface.co/NousResearch)                                                                              |
@@ -119,22 +120,22 @@ pip install -e '.[all]'
 ---
 
 &nbsp;
-
-# Get started
+# Quick start
 After installing LitGPT, select the model and action you want to take on that model (finetune, pretrain, evaluate, deploy, etc...):    
 
 ```bash
 # ligpt [action] [model]
-litgpt  download  mistralai/Mistral-7B-Instruct-v0.2
-litgpt  chat      mistralai/Mistral-7B-Instruct-v0.2
-litgpt  finetune  mistralai/Mistral-7B-Instruct-v0.2    
-litgpt  pretrain  mistralai/Mistral-7B-Instruct-v0.2    
-litgpt  serve     mistralai/Mistral-7B-Instruct-v0.2    
+litgpt  download  meta-llama/Meta-Llama-3-8B-Instruct
+litgpt  chat      meta-llama/Meta-Llama-3-8B-Instruct
+litgpt  finetune  meta-llama/Meta-Llama-3-8B-Instruct    
+litgpt  pretrain  meta-llama/Meta-Llama-3-8B-Instruct    
+litgpt  serve     meta-llama/Meta-Llama-3-8B-Instruct    
 ```
 
 &nbsp;
 
-###  Use an LLM
+###  Use an LLM for inference
+Use LLMs for inference to test its chatting capabilities, run evaluations, or extract embeddings, etc...     
 Here's an example showing how to use the Mistral 7B LLM.
 
 <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-chat">
@@ -172,7 +173,7 @@ For more information, refer to the [download](tutorials/download_model_weights.m
 litgpt download --repo_id microsoft/phi-2
 
 # 2) Finetune the model
-curl -L https://huggingface.co/datasets/medalpaca/medical_meadow_health_advice/raw/main/medical_meadow_health_advice.json -o my_custom_dataset.json
+curl -L https://huggingface.co/datasets/ksaw008/finance_alpaca/resolve/main/finance_alpaca.json -o my_custom_dataset.json
 
 litgpt finetune \
   --checkpoint_dir checkpoints/microsoft/phi-2 \
@@ -224,7 +225,7 @@ litgpt chat \
 &nbsp;
 
 ### Continue pretraining an LLM       
-This is another way of finetuning that specialize an already pretrained model by training on custom data:    
+This is another way of finetuning that specializes an already pretrained model by training on custom data:    
 
 
 <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-continue-pretraining">
@@ -259,8 +260,7 @@ litgpt chat \
 &nbsp;
 
 ### Deploy an LLM
-
-This example illustrates how to deploy an LLM using LitGPT.
+Once you're ready to deploy a finetuned LLM, run this command:   
 
 <a target="_blank" href="https://lightning.ai/lightning-ai/studios/litgpt-serve">
   <img src="https://pl-bolts-doc-images.s3.us-east-2.amazonaws.com/app-2/studio-badge.svg" alt="Open In Studio"/>
@@ -269,13 +269,15 @@ This example illustrates how to deploy an LLM using LitGPT.
 &nbsp;
 
 ```bash
-# 1) Download a pretrained model (alternatively, use your own finetuned model)
-litgpt download --repo_id microsoft/phi-2
+# locate the checkpoint to your finetuned or pretrained model and call the `serve` command:  
+litgpt serve --checkpoint_dir path/to/your/checkpoint/microsoft/phi-2
 
-# 2) Start the server
+# Alternative: if you haven't finetuned, download any checkpoint to deploy it:     
+litgpt download --repo_id microsoft/phi-2
 litgpt serve --checkpoint_dir checkpoints/microsoft/phi-2
 ```
 
+Test the server in a separate terminal and integrate the model API into your AI product:    
 ```python
 # 3) Use the server (in a separate session)
 import requests, json

diff --git a/config_hub/finetune/README.md b/config_hub/finetune/README.md
@@ -22,6 +22,10 @@ For more information, see the [Dealing with out-of-memory (OOM) errors](../../tu
 | llama-2-7b/qlora.yaml             | 7B   | Alpaca 2k | 4      | 0.814    | 13.68 GB    | 512            | 2                | bfloat16  | 45.68 min (A10G)   |
 | llama-2-7b/full.yaml              | 7B   | Alpaca 2k | 1      | 0.941    | 26.81 GB    | 512            | 4                | bfloat16  | 1.78 min (4xA100)  |
 |                                   |      |           |        |          |             |                |                  |           |                    |
+| llama-3-8b/lora.yaml              | 8B   | Alpaca 2k | 2      | 0.890    | 19.73 GB    | 512            | 1                | bfloat16  | 14.80 min (A10G)   |
+| llama-3-8b/qlora.yaml             | 8B   | Alpaca 2k | 2      | 0.941    | 17.41 GB    | 512            | 2                | bfloat16  | 22.34 min (A10G)   |
+| llama-3-8b/full.yaml              | 8B   | Alpaca 2k | 1      | 1.451    | 35.48 GB    | 512            | 4                | bfloat16  | 2.14 min (4xA100)  |
+|                                   |      |           |        |          |             |                |                  |           |                    |
 | mistral-7b/lora.yaml  (v0.1)      | 7B   | Alpaca 2k | 4      | 0.796    | 20.65 GB    | 512            | 2                | bfloat16  | 31.04 min (1xA10G) |
 | mistral-7b/qlora.yaml (v0.1)      | 7B   | Alpaca 2k | 4      | 0.803    | 14.29 GB    | 512            | 2                | bfloat16  | 44.69 min (1xA10G) |
 |                                   |      |           |        |          |             |                |                  |           |                    |

diff --git a/config_hub/finetune/llama-3-8b/full.yaml b/config_hub/finetune/llama-3-8b/full.yaml
@@ -0,0 +1,95 @@
+
+# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
+checkpoint_dir: checkpoints/meta-llama/Meta-Llama-3-8B
+
+# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/finetune/full)
+out_dir: out/finetune/full-llama-3-8b
+
+# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
+precision: bf16-true
+
+# How many devices/GPUs to use (type: Union[int, str], default: 1)
+devices: 4
+
+# Path to a checkpoint directory to resume from in case training was interrupted, or ``True`` to resume
+# from the latest checkpoint in ``out_dir``. (type: Union[bool, Path], default: False)
+resume: false
+
+# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
+data:
+  class_path: litgpt.data.Alpaca2k
+  init_args:
+    mask_prompt: false
+    prompt_style: alpaca
+    ignore_index: -100
+    seed: 42
+    num_workers: 4
+
+# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
+train:
+
+  # Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
+  save_interval: 200
+
+  # Number of iterations between logging calls (type: int, default: 1)
+  log_interval: 1
+
+  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 64)
+  global_batch_size: 64
+
+  # Number of samples per data-parallel rank (type: int, default: 1)
+  micro_batch_size: 4
+
+  # Number of iterations with learning rate warmup active (type: int, default: 100)
+  lr_warmup_steps: 25
+
+  # Number of epochs to train on (type: Optional[int], default: 5)
+  epochs: 1
+
+  # Total number of tokens to train on (type: Optional[int], default: null)
+  max_tokens:
+
+  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
+  max_steps:
+
+  # Limits the length of samples. Off by default (type: Optional[int], default: null)
+  max_seq_length: 512
+
+  # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
+  tie_embeddings:
+
+  #   (type: float, default: 0.003)
+  learning_rate: 0.0002
+
+  #   (type: float, default: 0.02)
+  weight_decay: 0.1
+
+  #   (type: float, default: 0.9)
+  beta1: 0.9
+
+  #   (type: float, default: 0.95)
+  beta2: 0.95
+
+  #   (type: Optional[float], default: null)
+  max_norm:
+
+  #   (type: float, default: 6e-05)
+  min_lr: 6.0e-05
+
+# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
+eval:
+
+  # Number of optimizer steps between evaluation calls (type: int, default: 600)
+  interval: 25
+
+  # Number of tokens to generate (type: Optional[int], default: 100)
+  max_new_tokens: 100
+
+  # Number of iterations (type: int, default: 100)
+  max_iters: 100
+
+# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
+logger_name: csv
+
+# The random seed to use for reproducibility. (type: int, default: 1337)
+seed: 1337
diff --git a/config_hub/finetune/llama-3-8b/lora.yaml b/config_hub/finetune/llama-3-8b/lora.yaml
@@ -0,0 +1,121 @@
+
+# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
+checkpoint_dir: checkpoints/meta-llama/Meta-Llama-3-8B
+
+# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
+out_dir: out/finetune/lora-llama-3-8b
+
+# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
+precision: bf16-true
+
+# If set, quantize the model with this algorithm. See ``tutorials/quantize.md`` for more information. (type: Optional[Literal['nf4', 'nf4-dq', 'fp4', 'fp4-dq', 'int8-training']], default: null)
+quantize:
+
+# How many devices/GPUs to use. (type: Union[int, str], default: 1)
+devices: 1
+
+# The LoRA rank. (type: int, default: 8)
+lora_r: 32
+
+# The LoRA alpha. (type: int, default: 16)
+lora_alpha: 16
+
+# The LoRA dropout value. (type: float, default: 0.05)
+lora_dropout: 0.05
+
+# Whether to apply LoRA to the query weights in attention. (type: bool, default: True)
+lora_query: true
+
+# Whether to apply LoRA to the key weights in attention. (type: bool, default: False)
+lora_key: false
+
+# Whether to apply LoRA to the value weights in attention. (type: bool, default: True)
+lora_value: true
+
+# Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False)
+lora_projection: false
+
+# Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False)
+lora_mlp: false
+
+# Whether to apply LoRA to output head in GPT. (type: bool, default: False)
+lora_head: false
+
+# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
+data:
+  class_path: litgpt.data.Alpaca2k
+  init_args:
+    mask_prompt: false
+    prompt_style: alpaca
+    ignore_index: -100
+    seed: 42
+    num_workers: 4
+
+# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
+train:
+
+  # Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
+  save_interval: 200
+
+  # Number of iterations between logging calls (type: int, default: 1)
+  log_interval: 1
+
+  # Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
+  global_batch_size: 8
+
+  # Number of samples per data-parallel rank (type: int, default: 4)
+  micro_batch_size: 1
+
+  # Number of iterations with learning rate warmup active (type: int, default: 100)
+  lr_warmup_steps: 10
+
+  # Number of epochs to train on (type: Optional[int], default: 5)
+  epochs: 2
+
+  # Total number of tokens to train on (type: Optional[int], default: null)
+  max_tokens:
+
+  # Limits the number of optimizer steps to run. (type: Optional[int], default: null)
+  max_steps:
+
+  # Limits the length of samples. Off by default (type: Optional[int], default: null)
+  max_seq_length: 512
+
+  # Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
+  tie_embeddings:
+
+  #   (type: float, default: 0.0003)
+  learning_rate: 0.0002
+
+  #   (type: float, default: 0.02)
+  weight_decay: 0.0
+
+  #   (type: float, default: 0.9)
+  beta1: 0.9
+
+  #   (type: float, default: 0.95)
+  beta2: 0.95
+
+  #   (type: Optional[float], default: null)
+  max_norm:
+
+  #   (type: float, default: 6e-05)
+  min_lr: 6.0e-05
+
+# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
+eval:
+
+  # Number of optimizer steps between evaluation calls (type: int, default: 100)
+  interval: 100
+
+  # Number of tokens to generate (type: Optional[int], default: 100)
+  max_new_tokens: 100
+
+  # Number of iterations (type: int, default: 100)
+  max_iters: 100
+
+# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
+logger_name: csv
+
+# The random seed to use for reproducibility. (type: int, default: 1337)
+seed: 1337