forked from NVIDIA/NeMo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[NeMo-UX] Adding recipes (NVIDIA#9720) (NVIDIA#9851)
* Adding recipy proposal * Adding more recipies * Apply isort and black reformatting * Remove api.py inside llm.gpt.model * Adding resume to FineTuneRecipy * Fix spelling error * Fix spelling error * Fix spelling error * Apply isort and black reformatting * Adding resume to PreTrainRecipe * update recipe proposal to use sdk.Partial * Apply isort and black reformatting * update __init__ * update __init__ * fix return type * Fix bug in factory * rename recipe folder to 'models' * Fixes * Apply isort and black reformatting * Bug fixes * rename models --> configs * Apply isort and black reformatting * rename configs --> recipes * Apply isort and black reformatting * address comments --------- Signed-off-by: ashors1 <[email protected]> Signed-off-by: artbataev <[email protected]> Signed-off-by: marcromeyn <[email protected]> Signed-off-by: ashors1 <[email protected]> Signed-off-by: Hemil Desai <[email protected]> Signed-off-by: hemildesai <[email protected]> Co-authored-by: Marc Romeyn <[email protected]> Co-authored-by: artbataev <[email protected]> Co-authored-by: marcromeyn <[email protected]> Co-authored-by: ashors1 <[email protected]> Co-authored-by: ashors1 <[email protected]> Co-authored-by: Hemil Desai <[email protected]> Co-authored-by: hemildesai <[email protected]> Co-authored-by: Anna Shors <[email protected]> Signed-off-by: Boxiang Wang <[email protected]>
- Loading branch information
Showing
13 changed files
with
320 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
from nemo.collections.llm.recipes import llama2_7b, llama3_8b, llama3_8b_16k, llama3_8b_64k, mistral | ||
from nemo.collections.llm.recipes.log.default import default_log | ||
from nemo.collections.llm.recipes.optim import adam | ||
|
||
__all__ = [ | ||
"llama3_8b", | ||
"llama3_8b_16k", | ||
"llama3_8b_64k", | ||
"llama2_7b", | ||
"mistral", | ||
"adam", | ||
"default_log", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import pytorch_lightning as pl | ||
|
||
from nemo import lightning as nl | ||
from nemo.collections.llm.api import finetune, pretrain | ||
from nemo.collections.llm.gpt.data.api import squad | ||
from nemo.collections.llm.gpt.model.llama import Llama2Config7B, LlamaModel | ||
from nemo.collections.llm.peft.api import gpt_lora | ||
from nemo.collections.llm.recipes.log.default import default_log | ||
from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing | ||
from nemo.collections.llm.utils import Partial, factory | ||
|
||
NAME = "llama2_7b" | ||
|
||
|
||
@factory(name=NAME) | ||
def model() -> pl.LightningModule: | ||
return LlamaModel(Llama2Config7B()) | ||
|
||
|
||
@factory(name=NAME) | ||
def trainer(devices=8) -> nl.Trainer: | ||
strategy = nl.MegatronStrategy(tensor_model_parallel_size=2) | ||
|
||
return nl.Trainer( | ||
devices=devices, | ||
max_steps=100, | ||
accelerator="gpu", | ||
strategy=strategy, | ||
plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"), | ||
) | ||
|
||
|
||
@factory(name=NAME + "_hf") | ||
def hf_resume() -> nl.AutoResume: | ||
return nl.AutoResume(import_path="hf://meta-llama/Llama-2-7b-hf") | ||
|
||
|
||
@factory(name=NAME, for_task="llm.pretrain") | ||
def pretrain_recipe() -> Partial: | ||
return Partial( | ||
pretrain, | ||
model=model, | ||
trainer=trainer, | ||
data=squad, | ||
log=default_log, | ||
optim=adam_with_cosine_annealing, | ||
) | ||
|
||
|
||
@factory(name=NAME, for_task="llm.finetune") | ||
def finetune_recipe() -> Partial: | ||
return Partial( | ||
finetune, | ||
model=model, | ||
trainer=trainer, | ||
data=squad, | ||
log=default_log, | ||
optim=adam_with_cosine_annealing, | ||
peft=gpt_lora, | ||
resume=hf_resume, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import pytorch_lightning as pl | ||
|
||
from nemo import lightning as nl | ||
from nemo.collections.llm.api import finetune, pretrain | ||
from nemo.collections.llm.gpt.data.api import squad | ||
from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel | ||
from nemo.collections.llm.peft.api import gpt_lora | ||
from nemo.collections.llm.recipes.log.default import default_log | ||
from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing | ||
from nemo.collections.llm.utils import Partial, factory | ||
|
||
NAME = "llama3_8b" | ||
|
||
|
||
@factory(name=NAME) | ||
def model() -> pl.LightningModule: | ||
return LlamaModel(Llama3Config8B(seq_length=16384)) | ||
|
||
|
||
@factory(name=NAME) | ||
def trainer(devices=8) -> nl.Trainer: | ||
strategy = nl.MegatronStrategy(tensor_model_parallel_size=2) | ||
|
||
return nl.Trainer( | ||
devices=devices, | ||
max_steps=100, | ||
accelerator="gpu", | ||
strategy=strategy, | ||
plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"), | ||
) | ||
|
||
|
||
@factory(name=NAME + "_hf") | ||
def hf_resume() -> nl.AutoResume: | ||
return nl.AutoResume(import_path="hf://meta-llama/Meta-Llama-3-8B") | ||
|
||
|
||
@factory(name=NAME, for_task="llm.pretrain") | ||
def pretrain_recipe() -> Partial: | ||
return Partial( | ||
pretrain, | ||
model=model, | ||
trainer=trainer, | ||
data=squad, | ||
log=default_log, | ||
optim=adam_with_cosine_annealing, | ||
) | ||
|
||
|
||
@factory(name=NAME, for_task="llm.finetune") | ||
def finetune_recipe() -> Partial: | ||
return Partial( | ||
finetune, | ||
model=model, | ||
trainer=trainer, | ||
data=squad, | ||
log=default_log, | ||
optim=adam_with_cosine_annealing, | ||
peft=gpt_lora, | ||
resume=hf_resume, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import pytorch_lightning as pl | ||
|
||
from nemo import lightning as nl | ||
from nemo.collections.llm.api import pretrain | ||
from nemo.collections.llm.gpt.data.api import squad | ||
from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel | ||
from nemo.collections.llm.recipes.log.default import default_log | ||
from nemo.collections.llm.recipes.optim.adam import adam_with_cosine_annealing | ||
from nemo.collections.llm.utils import Partial, factory | ||
|
||
NAME = "llama3_8b_16k" | ||
|
||
|
||
@factory(name=NAME) | ||
def model() -> pl.LightningModule: | ||
return LlamaModel(Llama3Config8B(seq_length=16384)) | ||
|
||
|
||
@factory(name=NAME) | ||
def trainer(devices=8) -> nl.Trainer: | ||
strategy = nl.MegatronStrategy( | ||
tensor_model_parallel_size=4, | ||
context_parallel_size=2, | ||
sequence_parallel=True, | ||
) | ||
|
||
return nl.Trainer( | ||
devices=devices, | ||
max_steps=100, | ||
accelerator="gpu", | ||
strategy=strategy, | ||
plugins=nl.MegatronMixedPrecision(precision="bf16-mixed"), | ||
) | ||
|
||
|
||
@factory(name=NAME, for_task="llm.pretrain") | ||
def pretrain_recipe() -> Partial: | ||
return Partial( | ||
pretrain, | ||
model=model, | ||
trainer=trainer, | ||
data=squad, | ||
log=default_log, | ||
optim=adam_with_cosine_annealing, | ||
) |
Oops, something went wrong.