simplify PR

Lightning-AI · Apr 25, 2024 · 0f9f670 · 0f9f670
1 parent ca29a08
commit 0f9f670
Show file tree

Hide file tree

Showing 35 changed files with 494 additions and 27 deletions.
diff --git a/config_hub/finetune/falcon-7b/lora.yaml b/config_hub/finetune/falcon-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/falcon-7b/qlora.yaml b/config_hub/finetune/falcon-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/gemma-2b/full.yaml b/config_hub/finetune/gemma-2b/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/gemma-2b/lora.yaml b/config_hub/finetune/gemma-2b/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/gemma-2b/qlora.yaml b/config_hub/finetune/gemma-2b/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/gemma-7b/lora.yaml b/config_hub/finetune/gemma-7b/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/gemma-7b/qlora.yaml b/config_hub/finetune/gemma-7b/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-2-7b/full.yaml b/config_hub/finetune/llama-2-7b/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-2-7b/lora.yaml b/config_hub/finetune/llama-2-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-2-7b/qlora.yaml b/config_hub/finetune/llama-2-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-3-8b/full.yaml b/config_hub/finetune/llama-3-8b/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-3-8b/lora.yaml b/config_hub/finetune/llama-3-8b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/llama-3-8b/qlora.yaml b/config_hub/finetune/llama-3-8b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/mistral-7b-v0.2/lora.yaml b/config_hub/finetune/mistral-7b-v0.2/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/mistral-7b-v0.2/qlora.yaml b/config_hub/finetune/mistral-7b-v0.2/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/mistral-7b/lora.yaml b/config_hub/finetune/mistral-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/mistral-7b/qlora.yaml b/config_hub/finetune/mistral-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/phi-2/full.yaml b/config_hub/finetune/phi-2/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/phi-2/lora.yaml b/config_hub/finetune/phi-2/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/phi-2/qlora.yaml b/config_hub/finetune/phi-2/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/stablelm-base-alpha-3b/full.yaml b/config_hub/finetune/stablelm-base-alpha-3b/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml b/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml b/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/tiny-llama/full.yaml b/config_hub/finetune/tiny-llama/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/tiny-llama/lora.yaml b/config_hub/finetune/tiny-llama/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/finetune/tiny-llama/qlora.yaml b/config_hub/finetune/tiny-llama/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 

diff --git a/config_hub/pretrain/debug.yaml b/config_hub/pretrain/debug.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 

diff --git a/config_hub/pretrain/tinyllama.yaml b/config_hub/pretrain/tinyllama.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 

diff --git a/config_hub/pretrain/tinystories.yaml b/config_hub/pretrain/tinystories.yaml
@@ -104,6 +104,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 

diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py
@@ -220,7 +220,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -232,11 +237,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    if eval.inital_validation:
-        fabric.print("Validating ...")
-        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
-    else:
-        val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1

diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py
@@ -220,7 +220,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -232,11 +237,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    if eval.inital_validation:
-        fabric.print("Validating ...")
-        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
-    else:
-        val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1

diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py
@@ -194,7 +194,13 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
+
     initial_iter = state["iter_num"]
     max_steps = train.max_steps or float("inf")
     train_iterator = CycleIterator(train_dataloader)
@@ -216,11 +222,6 @@ def fit(
         fabric.device
     )
     fabric.barrier()
-    if eval.inital_validation:
-        fabric.print("Validating ...")
-        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
-    else:
-        val_loss = "n/a"
 
     while state["step_count"] < max_steps and train_iterator.epoch < train.epochs:
         state["iter_num"] += 1

diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py
@@ -251,7 +251,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -263,11 +268,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    if eval.inital_validation:
-        fabric.print("Validating ...")
-        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
-    else:
-        val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1