Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Print initial validation loss + final validation loss #1228

Merged
merged 11 commits into from
Apr 29, 2024
Merged
3 changes: 3 additions & 0 deletions config_hub/finetune/falcon-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/falcon-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/gemma-2b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/gemma-2b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/gemma-2b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/gemma-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/gemma-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-2-7b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-2-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-2-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-3-8b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-3-8b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/llama-3-8b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/mistral-7b-v0.2/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/mistral-7b-v0.2/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/mistral-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/mistral-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/phi-2/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/phi-2/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/phi-2/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/stablelm-base-alpha-3b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/stablelm-base-alpha-3b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/stablelm-base-alpha-3b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/tiny-llama/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/tiny-llama/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/finetune/tiny-llama/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

Expand Down
3 changes: 3 additions & 0 deletions config_hub/pretrain/debug.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
devices: auto

Expand Down
3 changes: 3 additions & 0 deletions config_hub/pretrain/tinyllama.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
devices: auto

Expand Down
3 changes: 3 additions & 0 deletions config_hub/pretrain/tinystories.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ eval:
# Number of iterations (type: int, default: 100)
max_iters: 100

# Whether to evaluate on the validation set at the beginning of the training
initial_validation: false

# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
devices: auto

Expand Down
2 changes: 2 additions & 0 deletions litgpt/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,5 @@ class EvalArgs:
"""Number of tokens to generate"""
max_iters: int = 100
"""Number of iterations"""
initial_validation: bool = False
"""Whether to evaluate on the validation set at the beginning of the training"""
8 changes: 6 additions & 2 deletions litgpt/finetune/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,12 @@ def fit(
f" {model.max_seq_length} and context length is {model.config.block_size}"
)

validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
if eval.initial_validation:
val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
val_loss = f"{val_loss:.3f}"
else:
validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
val_loss = "n/a"

train_iterator = CycleIterator(train_dataloader)
throughput = ThroughputMonitor(fabric, window_size=50)
Expand All @@ -232,7 +237,6 @@ def fit(
iter_num = 0
total_lengths = 0
total_t0 = time.perf_counter()
val_loss = "n/a"

while step_count < max_steps and train_iterator.epoch < train.epochs:
iter_num += 1
Expand Down
8 changes: 6 additions & 2 deletions litgpt/finetune/adapter_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,12 @@ def fit(
f" {model.max_seq_length} and context length is {model.config.block_size}"
)

validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
if eval.initial_validation:
val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
val_loss = f"{val_loss:.3f}"
else:
validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
val_loss = "n/a"

train_iterator = CycleIterator(train_dataloader)
throughput = ThroughputMonitor(fabric, window_size=50)
Expand All @@ -232,7 +237,6 @@ def fit(
iter_num = 0
total_lengths = 0
total_t0 = time.perf_counter()
val_loss = "n/a"

while step_count < max_steps and train_iterator.epoch < train.epochs:
iter_num += 1
Expand Down
9 changes: 7 additions & 2 deletions litgpt/finetune/full.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,13 @@ def fit(
f" {model.max_seq_length} and context length is {model.config.block_size}"
)

validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
if eval.initial_validation:
val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
val_loss = f"{val_loss:.3f}"
else:
validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
val_loss = "n/a"

initial_iter = state["iter_num"]
max_steps = train.max_steps or float("inf")
train_iterator = CycleIterator(train_dataloader)
Expand All @@ -216,7 +222,6 @@ def fit(
fabric.device
)
fabric.barrier()
val_loss = "n/a"

while state["step_count"] < max_steps and train_iterator.epoch < train.epochs:
state["iter_num"] += 1
Expand Down
8 changes: 6 additions & 2 deletions litgpt/finetune/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,12 @@ def fit(
f" {model.max_seq_length} and context length is {model.config.block_size}"
)

validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
if eval.initial_validation:
val_loss = validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=len(val_dataloader)))
val_loss = f"{val_loss:.3f}"
else:
validate(fabric, model, val_dataloader, dataclasses.replace(eval, max_iters=2)) # sanity check
val_loss = "n/a"

train_iterator = CycleIterator(train_dataloader)
throughput = ThroughputMonitor(fabric, window_size=50)
Expand All @@ -263,7 +268,6 @@ def fit(
iter_num = 0
total_lengths = 0
total_t0 = time.perf_counter()
val_loss = "n/a"

while step_count < max_steps and train_iterator.epoch < train.epochs:
iter_num += 1
Expand Down
Loading