Merge branch 'main' into litgpt-eval

Lightning-AI · Apr 1, 2024 · 1ca218b · 1ca218b
2 parents a881630 + 449eb29
commit 1ca218b
Show file tree

Hide file tree

Showing 5 changed files with 17 additions and 7 deletions.
diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py
@@ -46,8 +46,8 @@ def setup(
     train: TrainArgs = TrainArgs(
         save_interval=1000,
         log_interval=1,
-        global_batch_size=128,
-        micro_batch_size=4,
+        global_batch_size=16,
+        micro_batch_size=1,
         lr_warmup_steps=100,
         epochs=5,
         learning_rate=1e-3,

diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py
@@ -46,8 +46,8 @@ def setup(
     train: TrainArgs = TrainArgs(
         save_interval=1000,
         log_interval=1,
-        global_batch_size=128,
-        micro_batch_size=4,
+        global_batch_size=16,
+        micro_batch_size=1,
         lr_warmup_steps=100,
         epochs=5,
         learning_rate=1e-3,

diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py
@@ -44,7 +44,7 @@ def setup(
     train: TrainArgs = TrainArgs(
         save_interval=1000,
         log_interval=1,
-        global_batch_size=64,
+        global_batch_size=16,
         micro_batch_size=1,
         lr_warmup_steps=100,
         epochs=5,

diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py
@@ -56,8 +56,8 @@ def setup(
     train: TrainArgs = TrainArgs(
         save_interval=1000,
         log_interval=1,
-        global_batch_size=128,
-        micro_batch_size=4,
+        global_batch_size=16,
+        micro_batch_size=1,
         lr_warmup_steps=100,
         epochs=5,
         learning_rate=3e-4,

diff --git a/tutorials/download_model_weights.md b/tutorials/download_model_weights.md
@@ -154,6 +154,16 @@ unsloth/Mistral-7B-v0.2
 > [!TIP]
 > To sort the list above by model name after the `/`, use `litgpt download | sort -f -t'/' -k2`.
 
+&nbsp;
+
+> [!NOTE]
+> If you want to adopt a model variant that is not listed in the table above but has a similar architecture as one of the supported models, you can use this model by by using the `--model_name` argument as shown below:
+> ```bash
+> litgpt download \
+>  --repo_id NousResearch/Hermes-2-Pro-Mistral-7B \
+>  --model_name Mistral-7B-v0.1
+> ```
+
 
 &nbsp;
 ### 2. Download Model Weights