Skip to content

Commit

Permalink
Update configs for 8b + 70b
Browse files Browse the repository at this point in the history
Signed-off-by: Valerie Sarge <[email protected]>
  • Loading branch information
vysarge committed Oct 28, 2024
1 parent c5b39e8 commit 6a7cd2a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 7 deletions.
12 changes: 8 additions & 4 deletions nemo/collections/llm/recipes/llama3_70b.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
seq_length: int = 4096,
seq_length: Optional[int] = None,
packed_sequence: Optional[bool] = None,
performance_mode: bool = False,
) -> run.Partial:
Expand Down Expand Up @@ -289,11 +289,15 @@ def finetune_recipe(
if packed_sequence is None:
packed_sequence = performance_mode

# For unpacked sequence, most samples in SQuAD dataset are shorter than 2K
if seq_length is None:
seq_length = 4096 if packed_sequence else 2048

recipe = default_finetune_recipe(
model(), "meta-llama/Meta-Llama-3-70B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
assert (num_nodes >= 4) or performance_mode
assert num_nodes >= 4
recipe.trainer.strategy.tensor_model_parallel_size = 8
recipe.trainer.strategy.pipeline_model_parallel_size = 4
recipe.optim.config.lr = 5e-6
Expand All @@ -305,8 +309,6 @@ def finetune_recipe(

# some settings currently do not function correctly with LoRA
recipe.model.config.cross_entropy_loss_fusion = False
recipe.trainer.strategy.ckpt_async_save = False
recipe.trainer.strategy.ddp = "megatron"

recipe.trainer.strategy.tensor_model_parallel_size = 8
recipe.optim.config.lr = 1e-4
Expand Down Expand Up @@ -354,6 +356,7 @@ def finetune_performance_optimizations(
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 4
recipe.trainer.strategy.pipeline_model_parallel_size = 4
recipe.trainer.strategy.virtual_pipeline_model_parallel_size = 5
recipe.trainer.plugins.grad_reduce_in_fp32 = False
recipe.trainer.strategy.ddp = run.Config(
DistributedDataParallelConfig,
Expand All @@ -367,6 +370,7 @@ def finetune_performance_optimizations(
run.Config(
MegatronCommOverlapCallback,
tp_comm_overlap=True,
defer_embedding_wgrad_compute=True,
)
)
else:
Expand Down
8 changes: 5 additions & 3 deletions nemo/collections/llm/recipes/llama3_8b.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
seq_length: int = 4096,
seq_length: Optional[int] = None,
packed_sequence: Optional[bool] = None,
performance_mode: bool = False,
) -> run.Partial:
Expand Down Expand Up @@ -277,6 +277,10 @@ def finetune_recipe(
if packed_sequence is None:
packed_sequence = performance_mode

# For unpacked sequence, most samples in SQuAD dataset are shorter than 2K
if seq_length is None:
seq_length = 4096 if packed_sequence else 2048

recipe = default_finetune_recipe(
model(), "meta-llama/Meta-Llama-3-8B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
Expand All @@ -291,8 +295,6 @@ def finetune_recipe(

# some settings currently do not function correctly with LoRA
recipe.model.config.cross_entropy_loss_fusion = False
recipe.trainer.strategy.ckpt_async_save = False
recipe.trainer.strategy.ddp = "megatron"

recipe.optim.config.lr = 1e-4
else:
Expand Down

0 comments on commit 6a7cd2a

Please sign in to comment.