diff --git a/config_hub/finetune/falcon-7b/lora.yaml b/config_hub/finetune/falcon-7b/lora.yaml
index eab0954182..c45b0fed94 100644
--- a/config_hub/finetune/falcon-7b/lora.yaml
+++ b/config_hub/finetune/falcon-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/falcon-7b/qlora.yaml b/config_hub/finetune/falcon-7b/qlora.yaml
index dfc5377bd8..33ab9d9fc3 100644
--- a/config_hub/finetune/falcon-7b/qlora.yaml
+++ b/config_hub/finetune/falcon-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/gemma-2b/full.yaml b/config_hub/finetune/gemma-2b/full.yaml
index 77f20658ca..879f1afee9 100644
--- a/config_hub/finetune/gemma-2b/full.yaml
+++ b/config_hub/finetune/gemma-2b/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/gemma-2b/lora.yaml b/config_hub/finetune/gemma-2b/lora.yaml
index c9f912a47c..91af82800d 100644
--- a/config_hub/finetune/gemma-2b/lora.yaml
+++ b/config_hub/finetune/gemma-2b/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/gemma-2b/qlora.yaml b/config_hub/finetune/gemma-2b/qlora.yaml
index dc15fe90d3..159ae2cc86 100644
--- a/config_hub/finetune/gemma-2b/qlora.yaml
+++ b/config_hub/finetune/gemma-2b/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/gemma-7b/lora.yaml b/config_hub/finetune/gemma-7b/lora.yaml
index d7d56f5b5c..59120c5d0b 100644
--- a/config_hub/finetune/gemma-7b/lora.yaml
+++ b/config_hub/finetune/gemma-7b/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/gemma-7b/qlora.yaml b/config_hub/finetune/gemma-7b/qlora.yaml
index 7d4a2c634c..556fba0cf5 100644
--- a/config_hub/finetune/gemma-7b/qlora.yaml
+++ b/config_hub/finetune/gemma-7b/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-2-7b/full.yaml b/config_hub/finetune/llama-2-7b/full.yaml
index 10e439b2de..99de788c74 100644
--- a/config_hub/finetune/llama-2-7b/full.yaml
+++ b/config_hub/finetune/llama-2-7b/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-2-7b/lora.yaml b/config_hub/finetune/llama-2-7b/lora.yaml
index 91f326757a..594b2f924d 100644
--- a/config_hub/finetune/llama-2-7b/lora.yaml
+++ b/config_hub/finetune/llama-2-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-2-7b/qlora.yaml b/config_hub/finetune/llama-2-7b/qlora.yaml
index a3b7cb8dde..106b9422f4 100644
--- a/config_hub/finetune/llama-2-7b/qlora.yaml
+++ b/config_hub/finetune/llama-2-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-3-8b/full.yaml b/config_hub/finetune/llama-3-8b/full.yaml
index 11aebcb155..e06d037710 100644
--- a/config_hub/finetune/llama-3-8b/full.yaml
+++ b/config_hub/finetune/llama-3-8b/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-3-8b/lora.yaml b/config_hub/finetune/llama-3-8b/lora.yaml
index 700a3b62f4..1d874a0690 100644
--- a/config_hub/finetune/llama-3-8b/lora.yaml
+++ b/config_hub/finetune/llama-3-8b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/llama-3-8b/qlora.yaml b/config_hub/finetune/llama-3-8b/qlora.yaml
index 1da95eaac5..33a0fc98be 100644
--- a/config_hub/finetune/llama-3-8b/qlora.yaml
+++ b/config_hub/finetune/llama-3-8b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/mistral-7b-v0.2/lora.yaml b/config_hub/finetune/mistral-7b-v0.2/lora.yaml
index aad8f7c986..f56e34c525 100644
--- a/config_hub/finetune/mistral-7b-v0.2/lora.yaml
+++ b/config_hub/finetune/mistral-7b-v0.2/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/mistral-7b-v0.2/qlora.yaml b/config_hub/finetune/mistral-7b-v0.2/qlora.yaml
index e2f5c3aafc..b648b24d72 100644
--- a/config_hub/finetune/mistral-7b-v0.2/qlora.yaml
+++ b/config_hub/finetune/mistral-7b-v0.2/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/mistral-7b/lora.yaml b/config_hub/finetune/mistral-7b/lora.yaml
index adfed6b08d..e991ec424e 100644
--- a/config_hub/finetune/mistral-7b/lora.yaml
+++ b/config_hub/finetune/mistral-7b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/mistral-7b/qlora.yaml b/config_hub/finetune/mistral-7b/qlora.yaml
index 7972048f46..e43b745bb8 100644
--- a/config_hub/finetune/mistral-7b/qlora.yaml
+++ b/config_hub/finetune/mistral-7b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/phi-2/full.yaml b/config_hub/finetune/phi-2/full.yaml
index 65040a393e..5b302a48ac 100644
--- a/config_hub/finetune/phi-2/full.yaml
+++ b/config_hub/finetune/phi-2/full.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/phi-2/lora.yaml b/config_hub/finetune/phi-2/lora.yaml
index a3f348c8b2..2571bc02d0 100644
--- a/config_hub/finetune/phi-2/lora.yaml
+++ b/config_hub/finetune/phi-2/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/phi-2/qlora.yaml b/config_hub/finetune/phi-2/qlora.yaml
index aa2c36d40a..d48d910939 100644
--- a/config_hub/finetune/phi-2/qlora.yaml
+++ b/config_hub/finetune/phi-2/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/stablelm-base-alpha-3b/full.yaml b/config_hub/finetune/stablelm-base-alpha-3b/full.yaml
index bd68af8714..c196fcc017 100644
--- a/config_hub/finetune/stablelm-base-alpha-3b/full.yaml
+++ b/config_hub/finetune/stablelm-base-alpha-3b/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml b/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml
index e674cc8419..6e52ea2175 100644
--- a/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml
+++ b/config_hub/finetune/stablelm-base-alpha-3b/lora.yaml
@@ -114,6 +114,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml b/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml
index 27b579cbd8..ebd2f098eb 100644
--- a/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml
+++ b/config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml
@@ -116,6 +116,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/tiny-llama/full.yaml b/config_hub/finetune/tiny-llama/full.yaml
index 4bc09e460b..fe1d1ef99d 100644
--- a/config_hub/finetune/tiny-llama/full.yaml
+++ b/config_hub/finetune/tiny-llama/full.yaml
@@ -85,6 +85,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/tiny-llama/lora.yaml b/config_hub/finetune/tiny-llama/lora.yaml
index 4991900954..c42ff28ff3 100644
--- a/config_hub/finetune/tiny-llama/lora.yaml
+++ b/config_hub/finetune/tiny-llama/lora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/finetune/tiny-llama/qlora.yaml b/config_hub/finetune/tiny-llama/qlora.yaml
index 1e8cf20b8a..7e80e4d0ca 100644
--- a/config_hub/finetune/tiny-llama/qlora.yaml
+++ b/config_hub/finetune/tiny-llama/qlora.yaml
@@ -115,6 +115,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
 logger_name: csv
 
diff --git a/config_hub/pretrain/debug.yaml b/config_hub/pretrain/debug.yaml
index bbe2fee2cc..e89dda3cc9 100644
--- a/config_hub/pretrain/debug.yaml
+++ b/config_hub/pretrain/debug.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 
diff --git a/config_hub/pretrain/tinyllama.yaml b/config_hub/pretrain/tinyllama.yaml
index a47bd946f3..e2418a5b17 100644
--- a/config_hub/pretrain/tinyllama.yaml
+++ b/config_hub/pretrain/tinyllama.yaml
@@ -88,6 +88,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 
diff --git a/config_hub/pretrain/tinystories.yaml b/config_hub/pretrain/tinystories.yaml
index 8ef1232862..8ed53a09d7 100644
--- a/config_hub/pretrain/tinystories.yaml
+++ b/config_hub/pretrain/tinystories.yaml
@@ -104,6 +104,9 @@ eval:
   # Number of iterations (type: int, default: 100)
   max_iters: 100
 
+  # Whether to evaluate on the validation set at the beginning of the training
+  initial_validation: false
+
 # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
 devices: auto
 
diff --git a/litgpt/args.py b/litgpt/args.py
index b227ffe3f6..7e277fe9e6 100644
--- a/litgpt/args.py
+++ b/litgpt/args.py
@@ -79,3 +79,5 @@ class EvalArgs:
     """Number of tokens to generate"""
     max_iters: int = 100
     """Number of iterations"""
+    initial_validation: bool = False
+    """Whether to evaluate on the validation set at the beginning of the training"""
diff --git a/litgpt/finetune/adapter.py b/litgpt/finetune/adapter.py
index be21af318d..313d0ea8e7 100644
--- a/litgpt/finetune/adapter.py
+++ b/litgpt/finetune/adapter.py
@@ -220,7 +220,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -232,7 +237,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1
diff --git a/litgpt/finetune/adapter_v2.py b/litgpt/finetune/adapter_v2.py
index f354decfd0..39b2a2d0e2 100644
--- a/litgpt/finetune/adapter_v2.py
+++ b/litgpt/finetune/adapter_v2.py
@@ -220,7 +220,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -232,7 +237,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1
diff --git a/litgpt/finetune/full.py b/litgpt/finetune/full.py
index 23de9b622c..01db855189 100644
--- a/litgpt/finetune/full.py
+++ b/litgpt/finetune/full.py
@@ -194,7 +194,13 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
+
     initial_iter = state["iter_num"]
     max_steps = train.max_steps or float("inf")
     train_iterator = CycleIterator(train_dataloader)
@@ -216,7 +222,6 @@ def fit(
         fabric.device
     )
     fabric.barrier()
-    val_loss = "n/a"
 
     while state["step_count"] < max_steps and train_iterator.epoch < train.epochs:
         state["iter_num"] += 1
diff --git a/litgpt/finetune/lora.py b/litgpt/finetune/lora.py
index 39e805befe..ae48bbc8fe 100644
--- a/litgpt/finetune/lora.py
+++ b/litgpt/finetune/lora.py
@@ -251,7 +251,12 @@ def fit(
         f" {model.max_seq_length} and context length is {model.config.block_size}"
     )
 
-    validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2))  # sanity check
+        val_loss = "n/a"
 
     train_iterator = CycleIterator(train_dataloader)
     throughput = ThroughputMonitor(fabric, window_size=50)
@@ -263,7 +268,6 @@ def fit(
     iter_num = 0
     total_lengths = 0
     total_t0 = time.perf_counter()
-    val_loss = "n/a"
 
     while step_count < max_steps and train_iterator.epoch < train.epochs:
         iter_num += 1
diff --git a/litgpt/pretrain.py b/litgpt/pretrain.py
index 3a763116a0..d5014dc022 100644
--- a/litgpt/pretrain.py
+++ b/litgpt/pretrain.py
@@ -228,7 +228,13 @@ def fit(
     model = state["model"]
     optimizer = state["optimizer"]
 
-    validate(fabric, model, val_dataloader, max_iters=2)  # sanity check
+    if eval.initial_validation:
+        val_loss = validate(fabric, model, val_dataloader, max_iters=eval.max_iters)
+        val_loss = f"{val_loss:.3f}"
+    else:
+        validate(fabric, model, val_dataloader, max_iters=2)   # sanity check
+        val_loss = "n/a"
+
     throughput = ThroughputMonitor(fabric, window_size=5)
 
     with torch.device("meta"):
@@ -252,7 +258,6 @@ def fit(
     )
     fabric.barrier()
     total_t0 = time.perf_counter()
-    val_loss = "n/a"
 
     warmup_iters = train.warmup_iters(devices, max_iters, train_dataloader)