Skip to content

Commit

Permalink
Huvu/t5 nemo2.0 nemoci 3b11b (#11388)
Browse files Browse the repository at this point in the history
* Update t5_11b.py

Signed-off-by: Huy Vu <[email protected]>

* Update t5_3b.py

Signed-off-by: Huy Vu <[email protected]>

* Update t5_3b.py

Signed-off-by: Huy Vu <[email protected]>

* Update t5_3b.py

Signed-off-by: Huy Vu <[email protected]>

---------

Signed-off-by: Huy Vu <[email protected]>
  • Loading branch information
huvunvidia authored Nov 27, 2024
1 parent eeb7b93 commit a6b08a6
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 14 deletions.
18 changes: 11 additions & 7 deletions nemo/collections/llm/recipes/t5_11b.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,17 @@ def pretrain_recipe(
guide in the `examples/llm/pretrain/` directory.
"""

opt_config = OptimizerConfig(
opt_config = run.Config(
OptimizerConfig,
optimizer='adam',
lr=0.0001,
use_distributed_optimizer=True,
bf16=True,
weight_decay=0.01,
)

lr_scheduler = WarmupAnnealingScheduler(
lr_scheduler = run.Config(
WarmupAnnealingScheduler,
warmup_steps=None,
warmup_ratio=0.01,
max_steps=1000000,
Expand All @@ -202,7 +204,7 @@ def pretrain_recipe(
MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=1920, micro_batch_size=24
),
log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler),
optim=run.Config(MegatronOptimizerModule, config=opt_config, lr_scheduler=lr_scheduler),
resume=default_resume(),
)

Expand Down Expand Up @@ -248,15 +250,17 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
opt_config = OptimizerConfig(
opt_config = run.Config(
OptimizerConfig,
optimizer='adam',
lr=1e-4,
lr=0.0001,
use_distributed_optimizer=True,
bf16=True,
weight_decay=0.01,
)

lr_scheduler = WarmupAnnealingScheduler(
lr_scheduler = run.Config(
WarmupAnnealingScheduler,
warmup_steps=50,
max_steps=2000,
min_lr=0.00001,
Expand All @@ -273,7 +277,7 @@ def finetune_recipe(
SquadDataModule, seq_length=512, seq_length_dec=128, global_batch_size=128, micro_batch_size=1
),
log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler),
optim=run.Config(MegatronOptimizerModule, config=opt_config, lr_scheduler=lr_scheduler),
resume=nemo_resume(checkpoint_path),
)

Expand Down
18 changes: 11 additions & 7 deletions nemo/collections/llm/recipes/t5_3b.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,17 @@ def pretrain_recipe(
guide in the `examples/llm/pretrain/` directory.
"""

opt_config = OptimizerConfig(
opt_config = run.Config(
OptimizerConfig,
optimizer='adam',
lr=0.0001,
use_distributed_optimizer=True,
bf16=True,
weight_decay=0.01,
)

lr_scheduler = WarmupAnnealingScheduler(
lr_scheduler = run.Config(
WarmupAnnealingScheduler,
warmup_steps=None,
warmup_ratio=0.01,
max_steps=1000000,
Expand All @@ -202,7 +204,7 @@ def pretrain_recipe(
MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=1920, micro_batch_size=24
),
log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler),
optim=run.Config(MegatronOptimizerModule, config=opt_config, lr_scheduler=lr_scheduler),
resume=default_resume(),
)

Expand Down Expand Up @@ -248,15 +250,17 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
opt_config = OptimizerConfig(
opt_config = run.Config(
OptimizerConfig,
optimizer='adam',
lr=1e-4,
lr=0.0001,
use_distributed_optimizer=True,
bf16=True,
weight_decay=0.01,
)

lr_scheduler = WarmupAnnealingScheduler(
lr_scheduler = run.Config(
WarmupAnnealingScheduler,
warmup_steps=50,
max_steps=2000,
min_lr=0.00001,
Expand All @@ -273,7 +277,7 @@ def finetune_recipe(
SquadDataModule, seq_length=512, seq_length_dec=128, global_batch_size=128, micro_batch_size=1
),
log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler),
optim=run.Config(MegatronOptimizerModule, config=opt_config, lr_scheduler=lr_scheduler),
resume=nemo_resume(checkpoint_path),
)

Expand Down

0 comments on commit a6b08a6

Please sign in to comment.