Skip to content

Commit

Permalink
Huvu/t5 nemo2.0 nemoci (#11291)
Browse files Browse the repository at this point in the history
* workable code

* debugging

* adding debugging code

* adding debugging code

* adding debugging code

* restore /huggingface/auto_tokenizer.py

* Apply isort and black reformatting

Signed-off-by: huvunvidia <[email protected]>

---------

Signed-off-by: huvunvidia <[email protected]>
Co-authored-by: Huy Vu2 <[email protected]>
Co-authored-by: huvunvidia <[email protected]>
  • Loading branch information
3 people authored Nov 20, 2024
1 parent 341580e commit cae18e6
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 4 deletions.
9 changes: 9 additions & 0 deletions nemo/collections/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,15 @@
gpt_forward_step,
)
from nemo.collections.llm.quantization import Quantizer, get_calib_data_iter
from nemo.collections.llm.t5.data import FineTuningDataModule as T5FineTuningDataModule
from nemo.collections.llm.t5.data import MockDataModule as T5MockDataModule
from nemo.collections.llm.t5.data import PreTrainingDataModule as T5PreTrainingDataModule
from nemo.collections.llm.t5.data import SquadDataModule as T5SquadDataModule
from nemo.collections.llm.t5.model import T5Config, T5Model, t5_data_step, t5_forward_step

__all__ = [
"MockDataModule",
"T5MockDataModule",
"GPTModel",
"GPTConfig",
"gpt_data_step",
Expand Down Expand Up @@ -198,6 +203,10 @@
"PreTrainingDataModule",
"FineTuningDataModule",
"SquadDataModule",
"T5PreTrainingDataModule",
"T5FineTuningDataModule",
"T5SquadDataModule",
"T5MockDataModule",
"DollyDataModule",
"tokenizer",
"mock",
Expand Down
8 changes: 5 additions & 3 deletions nemo/collections/llm/recipes/t5_220m.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,17 @@ def pretrain_recipe(
guide in the `examples/llm/pretrain/` directory.
"""

opt_config = OptimizerConfig(
opt_config = run.Config(
OptimizerConfig,
optimizer='adam',
lr=0.0001,
use_distributed_optimizer=True,
bf16=True,
weight_decay=0.01,
)

lr_scheduler = WarmupAnnealingScheduler(
lr_scheduler = run.Config(
WarmupAnnealingScheduler,
warmup_steps=None,
warmup_ratio=0.01,
max_steps=1000000,
Expand All @@ -200,7 +202,7 @@ def pretrain_recipe(
),
data=run.Config(MockDataModule, seq_length=512, seq_length_dec=128, global_batch_size=512, micro_batch_size=1),
log=default_log(dir=dir, name=name, tensorboard_logger=tensorboard_logger(name=name)),
optim=MegatronOptimizerModule(config=opt_config, lr_scheduler=lr_scheduler),
optim=run.Config(MegatronOptimizerModule, config=opt_config, lr_scheduler=lr_scheduler),
resume=default_resume(),
)

Expand Down
3 changes: 2 additions & 1 deletion nemo/collections/llm/t5/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from nemo.collections.llm.t5.data.fine_tuning import FineTuningDataModule
from nemo.collections.llm.t5.data.mock import MockDataModule
from nemo.collections.llm.t5.data.pre_training import PreTrainingDataModule
from nemo.collections.llm.t5.data.squad import SquadDataModule

__all__ = ["FineTuningDataModule", "PreTrainingDataModule", "SquadDataModule"]
__all__ = ["FineTuningDataModule", "PreTrainingDataModule", "SquadDataModule", "MockDataModule"]
2 changes: 2 additions & 0 deletions nemo/collections/llm/t5/model/t5.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ class T5Config(TransformerConfig, io.IOMixin):
max_position_embeddings: int = 512
rotary_percent: float = 1.0
seq_len_interpolation_factor: Optional[float] = None
seq_length: int = 512
seq_length_dec: int = 128
encoder_pipeline_model_parallel_size: int = 0
attention_softmax_in_fp32: float = False
bias_activation_fusion: bool = True
Expand Down

0 comments on commit cae18e6

Please sign in to comment.