From e4d1dbffa209ed3ca49a5df7bd0a64a0fad394fa Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Wed, 17 Jul 2024 19:03:31 -0700 Subject: [PATCH] Bug fixes --- nemo/collections/llm/models/llama2_7b.py | 11 +++++------ nemo/collections/llm/models/llama3_8b.py | 11 +++++------ nemo/collections/llm/models/llama3_8b_16k.py | 7 +++---- nemo/collections/llm/models/llama3_8b_64k.py | 7 +++---- nemo/collections/llm/models/mistral.py | 11 +++++------ 5 files changed, 21 insertions(+), 26 deletions(-) diff --git a/nemo/collections/llm/models/llama2_7b.py b/nemo/collections/llm/models/llama2_7b.py index f8254b1c74b90..c944ca26803dc 100644 --- a/nemo/collections/llm/models/llama2_7b.py +++ b/nemo/collections/llm/models/llama2_7b.py @@ -1,4 +1,3 @@ -import nemo_sdk as sdk import pytorch_lightning as pl from nemo import lightning as nl @@ -8,7 +7,7 @@ from nemo.collections.llm.models.log.default import default_log from nemo.collections.llm.models.optim.adam import adam_with_cosine_annealing from nemo.collections.llm.peft.api import gpt_lora -from nemo.collections.llm.utils import factory +from nemo.collections.llm.utils import Partial, factory NAME = "llama2_7b" @@ -37,8 +36,8 @@ def hf_resume() -> nl.AutoResume: @factory(name=NAME, for_task="llm.pretrain") -def pretrain_recipe() -> sdk.Partial: - return sdk.Partial( +def pretrain_recipe() -> Partial: + return Partial( pretrain, model=model, trainer=trainer, @@ -49,8 +48,8 @@ def pretrain_recipe() -> sdk.Partial: @factory(name=NAME, for_task="llm.finetune") -def finetune_recipe() -> sdk.Partial: - return sdk.Partial( +def finetune_recipe() -> Partial: + return Partial( finetune, model=model, trainer=trainer, diff --git a/nemo/collections/llm/models/llama3_8b.py b/nemo/collections/llm/models/llama3_8b.py index 25f7150c597d7..51db7aed55689 100644 --- a/nemo/collections/llm/models/llama3_8b.py +++ b/nemo/collections/llm/models/llama3_8b.py @@ -1,4 +1,3 @@ -import nemo_sdk as sdk import pytorch_lightning as pl from nemo import lightning as nl @@ -8,7 +7,7 @@ from nemo.collections.llm.models.log.default import default_log from nemo.collections.llm.models.optim.adam import adam_with_cosine_annealing from nemo.collections.llm.peft.api import gpt_lora -from nemo.collections.llm.utils import factory +from nemo.collections.llm.utils import Partial, factory NAME = "llama3_8b" @@ -37,8 +36,8 @@ def hf_resume() -> nl.AutoResume: @factory(name=NAME, for_task="llm.pretrain") -def pretrain_recipe() -> sdk.Partial: - return sdk.Partial( +def pretrain_recipe() -> Partial: + return Partial( pretrain, model=model, trainer=trainer, @@ -49,8 +48,8 @@ def pretrain_recipe() -> sdk.Partial: @factory(name=NAME, for_task="llm.finetune") -def finetune_recipe() -> sdk.Partial: - return sdk.Partial( +def finetune_recipe() -> Partial: + return Partial( finetune, model=model, trainer=trainer, diff --git a/nemo/collections/llm/models/llama3_8b_16k.py b/nemo/collections/llm/models/llama3_8b_16k.py index 0db71a72c5b10..38d2fe2ebd1b3 100644 --- a/nemo/collections/llm/models/llama3_8b_16k.py +++ b/nemo/collections/llm/models/llama3_8b_16k.py @@ -1,4 +1,3 @@ -import nemo_sdk as sdk import pytorch_lightning as pl from nemo import lightning as nl @@ -7,7 +6,7 @@ from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel from nemo.collections.llm.models.log.default import default_log from nemo.collections.llm.models.optim.adam import adam_with_cosine_annealing -from nemo.collections.llm.utils import factory +from nemo.collections.llm.utils import Partial, factory NAME = "llama3_8b_16k" @@ -36,8 +35,8 @@ def trainer(devices=8) -> nl.Trainer: @factory(name=NAME, for_task="llm.pretrain") -def pretrain_recipe() -> sdk.Partial: - return sdk.Partial( +def pretrain_recipe() -> Partial: + return Partial( pretrain, model=model, trainer=trainer, diff --git a/nemo/collections/llm/models/llama3_8b_64k.py b/nemo/collections/llm/models/llama3_8b_64k.py index d43454591339a..d8946acb64a04 100644 --- a/nemo/collections/llm/models/llama3_8b_64k.py +++ b/nemo/collections/llm/models/llama3_8b_64k.py @@ -1,4 +1,3 @@ -import nemo_sdk as sdk import pytorch_lightning as pl from nemo import lightning as nl @@ -7,7 +6,7 @@ from nemo.collections.llm.gpt.model.llama import Llama3Config8B, LlamaModel from nemo.collections.llm.models.log.default import default_log from nemo.collections.llm.models.optim.adam import adam_with_cosine_annealing -from nemo.collections.llm.utils import factory +from nemo.collections.llm.utils import Partial, factory NAME = "llama3_8b_64k" @@ -36,8 +35,8 @@ def trainer(devices=8) -> nl.Trainer: @factory(name=NAME, for_task="llm.pretrain") -def pretrain_recipe() -> sdk.Partial: - return sdk.Partial( +def pretrain_recipe() -> Partial: + return Partial( pretrain, model=model, trainer=trainer, diff --git a/nemo/collections/llm/models/mistral.py b/nemo/collections/llm/models/mistral.py index c11fd67b3992f..b9c200e73bf20 100644 --- a/nemo/collections/llm/models/mistral.py +++ b/nemo/collections/llm/models/mistral.py @@ -1,4 +1,3 @@ -import nemo_sdk as sdk import pytorch_lightning as pl from nemo import lightning as nl @@ -8,7 +7,7 @@ from nemo.collections.llm.models.log.default import default_log from nemo.collections.llm.models.optim.adam import adam_with_cosine_annealing from nemo.collections.llm.peft.api import gpt_lora -from nemo.collections.llm.utils import factory +from nemo.collections.llm.utils import Partial, factory NAME = "mistral" @@ -37,8 +36,8 @@ def hf_resume() -> nl.AutoResume: @factory(name=NAME, for_task="llm.pretrain") -def pretrain_recipe() -> sdk.Partial: - return sdk.Partial( +def pretrain_recipe() -> Partial: + return Partial( pretrain, model=model, trainer=trainer, @@ -49,8 +48,8 @@ def pretrain_recipe() -> sdk.Partial: @factory(name=NAME, for_task="llm.finetune") -def finetune_recipe() -> sdk.Partial: - return sdk.Partial( +def finetune_recipe() -> Partial: + return Partial( finetune, model=model, trainer=trainer,