From 4731c758c5336dcaa2510cf2fdf01feb3da10714 Mon Sep 17 00:00:00 2001 From: akoumpa Date: Wed, 30 Oct 2024 02:16:25 +0000 Subject: [PATCH] Apply isort and black reformatting Signed-off-by: akoumpa --- nemo/collections/llm/gpt/model/base.py | 1 + nemo/collections/llm/gpt/model/chatglm.py | 5 ++--- nemo/collections/llm/gpt/model/llama.py | 11 ++++++++--- nemo/collections/llm/gpt/model/mistral.py | 7 ++++++- nemo/collections/llm/gpt/model/mixtral.py | 7 ++++++- nemo/collections/llm/gpt/model/nemotron.py | 4 +++- nemo/collections/llm/gpt/model/qwen2.py | 5 ++++- nemo/collections/llm/gpt/model/starcoder.py | 6 +++--- nemo/lightning/io/api.py | 1 + nemo/lightning/io/connector.py | 3 ++- 10 files changed, 36 insertions(+), 14 deletions(-) diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py index 39318bf2ff7e..ecba13d86ec6 100644 --- a/nemo/collections/llm/gpt/model/base.py +++ b/nemo/collections/llm/gpt/model/base.py @@ -134,6 +134,7 @@ def torch_dtype_from_mcore_config(config: TransformerConfig): else: return torch.float + @dataclass class GPTConfig(TransformerConfig, io.IOMixin): # From megatron.core.models.gpt.gpt_model.GPTModel diff --git a/nemo/collections/llm/gpt/model/chatglm.py b/nemo/collections/llm/gpt/model/chatglm.py index 61fa70d5116e..f9ad8fc6010c 100644 --- a/nemo/collections/llm/gpt/model/chatglm.py +++ b/nemo/collections/llm/gpt/model/chatglm.py @@ -198,7 +198,6 @@ def config(self) -> "AutoConfig": ) - @io.state_transform( source_key="embedding.word_embeddings.weight", target_key="transformer.embedding.word_embeddings.weight", @@ -206,7 +205,7 @@ def config(self) -> "AutoConfig": def _export_embedding(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] @io.state_transform( @@ -216,7 +215,7 @@ def _export_embedding(ctx: io.TransformCTX, embedding): def _export_head(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] @io.state_transform( diff --git a/nemo/collections/llm/gpt/model/llama.py b/nemo/collections/llm/gpt/model/llama.py index 79d688352457..7b235d59ee89 100644 --- a/nemo/collections/llm/gpt/model/llama.py +++ b/nemo/collections/llm/gpt/model/llama.py @@ -322,7 +322,12 @@ def convert_state(self, source, target): "decoder.final_layernorm.weight": "model.norm.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head], + ) @property def tokenizer(self): @@ -431,7 +436,7 @@ def _export_qkv(ctx: io.TransformCTX, linear_qkv): def _export_embedding(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] @io.state_transform( @@ -441,7 +446,7 @@ def _export_embedding(ctx: io.TransformCTX, embedding): def _export_head(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] @io.state_transform( diff --git a/nemo/collections/llm/gpt/model/mistral.py b/nemo/collections/llm/gpt/model/mistral.py index 3875d7bee1dc..a71042e2ba6f 100644 --- a/nemo/collections/llm/gpt/model/mistral.py +++ b/nemo/collections/llm/gpt/model/mistral.py @@ -218,7 +218,12 @@ def convert_state(self, source, target): "decoder.final_layernorm.weight": "model.norm.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head], + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/mixtral.py b/nemo/collections/llm/gpt/model/mixtral.py index eb936584e03a..29361c38fda5 100644 --- a/nemo/collections/llm/gpt/model/mixtral.py +++ b/nemo/collections/llm/gpt/model/mixtral.py @@ -301,7 +301,12 @@ def convert_state(self, source, target): "decoder.final_layernorm.weight": "model.norm.weight", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_moe_w1_w3, _export_embedding, _export_head]) + return io.apply_transforms( + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_moe_w1_w3, _export_embedding, _export_head], + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/nemotron.py b/nemo/collections/llm/gpt/model/nemotron.py index bed627c10649..8fdc5f8f0f00 100644 --- a/nemo/collections/llm/gpt/model/nemotron.py +++ b/nemo/collections/llm/gpt/model/nemotron.py @@ -242,7 +242,9 @@ def convert_state(self, source, target): "decoder.final_layernorm.bias": "model.norm.bias", } - return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_embedding, _export_head]) + return io.apply_transforms( + source, target, mapping=mapping, transforms=[_export_qkv, _export_embedding, _export_head] + ) @property def tokenizer(self): diff --git a/nemo/collections/llm/gpt/model/qwen2.py b/nemo/collections/llm/gpt/model/qwen2.py index 984844d8643f..4cf0292d1a6a 100644 --- a/nemo/collections/llm/gpt/model/qwen2.py +++ b/nemo/collections/llm/gpt/model/qwen2.py @@ -201,7 +201,10 @@ def convert_state(self, source, target): } return io.apply_transforms( - source, target, mapping=mapping, transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1, _export_embedding, _export_head] + source, + target, + mapping=mapping, + transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1, _export_embedding, _export_head], ) @property diff --git a/nemo/collections/llm/gpt/model/starcoder.py b/nemo/collections/llm/gpt/model/starcoder.py index 5e461618d75b..b3e7b25f705b 100644 --- a/nemo/collections/llm/gpt/model/starcoder.py +++ b/nemo/collections/llm/gpt/model/starcoder.py @@ -16,9 +16,9 @@ from pathlib import Path from typing import TYPE_CHECKING, Annotated, Callable, Optional +import torch import torch.nn.functional as F from torch import nn -import torch from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config from nemo.collections.llm.utils import Config @@ -233,7 +233,7 @@ def config(self) -> "HFStarcoderConfig": def _export_embedding(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] @io.state_transform( @@ -243,4 +243,4 @@ def _export_embedding(ctx: io.TransformCTX, embedding): def _export_head(ctx: io.TransformCTX, embedding): megatron_config = ctx.target.config # prune padding. - return embedding[:megatron_config.vocab_size, :] + return embedding[: megatron_config.vocab_size, :] diff --git a/nemo/lightning/io/api.py b/nemo/lightning/io/api.py index a5fb7daa7fc4..be9372f2e79b 100644 --- a/nemo/lightning/io/api.py +++ b/nemo/lightning/io/api.py @@ -63,6 +63,7 @@ def load_context(path: Path, subpath: Optional[str] = None, build: bool = True): path = path / 'context' return load(path, output_type=TrainerContext, subpath=subpath, build=build) + def model_importer(target: Type[ConnectorMixin], ext: str) -> Callable[[Type[ConnT]], Type[ConnT]]: """ Registers an importer for a model with a specified file extension and an optional default path. diff --git a/nemo/lightning/io/connector.py b/nemo/lightning/io/connector.py index 5706f1b4859b..fd7b814fe730 100644 --- a/nemo/lightning/io/connector.py +++ b/nemo/lightning/io/connector.py @@ -228,7 +228,8 @@ def nemo_load( model = load_context(path).model _trainer = trainer or Trainer( - devices=1, accelerator="cpu" if cpu else "gpu", + devices=1, + accelerator="cpu" if cpu else "gpu", strategy=MegatronStrategy(ddp="pytorch", setup_optimizers=False), )