From 4731c758c5336dcaa2510cf2fdf01feb3da10714 Mon Sep 17 00:00:00 2001
From: akoumpa <akoumpa@users.noreply.github.com>
Date: Wed, 30 Oct 2024 02:16:25 +0000
Subject: [PATCH] Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
---
 nemo/collections/llm/gpt/model/base.py      |  1 +
 nemo/collections/llm/gpt/model/chatglm.py   |  5 ++---
 nemo/collections/llm/gpt/model/llama.py     | 11 ++++++++---
 nemo/collections/llm/gpt/model/mistral.py   |  7 ++++++-
 nemo/collections/llm/gpt/model/mixtral.py   |  7 ++++++-
 nemo/collections/llm/gpt/model/nemotron.py  |  4 +++-
 nemo/collections/llm/gpt/model/qwen2.py     |  5 ++++-
 nemo/collections/llm/gpt/model/starcoder.py |  6 +++---
 nemo/lightning/io/api.py                    |  1 +
 nemo/lightning/io/connector.py              |  3 ++-
 10 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/nemo/collections/llm/gpt/model/base.py b/nemo/collections/llm/gpt/model/base.py
index 39318bf2ff7e..ecba13d86ec6 100644
--- a/nemo/collections/llm/gpt/model/base.py
+++ b/nemo/collections/llm/gpt/model/base.py
@@ -134,6 +134,7 @@ def torch_dtype_from_mcore_config(config: TransformerConfig):
     else:
         return torch.float
 
+
 @dataclass
 class GPTConfig(TransformerConfig, io.IOMixin):
     # From megatron.core.models.gpt.gpt_model.GPTModel
diff --git a/nemo/collections/llm/gpt/model/chatglm.py b/nemo/collections/llm/gpt/model/chatglm.py
index 61fa70d5116e..f9ad8fc6010c 100644
--- a/nemo/collections/llm/gpt/model/chatglm.py
+++ b/nemo/collections/llm/gpt/model/chatglm.py
@@ -198,7 +198,6 @@ def config(self) -> "AutoConfig":
         )
 
 
-
 @io.state_transform(
     source_key="embedding.word_embeddings.weight",
     target_key="transformer.embedding.word_embeddings.weight",
@@ -206,7 +205,7 @@ def config(self) -> "AutoConfig":
 def _export_embedding(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
 
 
 @io.state_transform(
@@ -216,7 +215,7 @@ def _export_embedding(ctx: io.TransformCTX, embedding):
 def _export_head(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
 
 
 @io.state_transform(
diff --git a/nemo/collections/llm/gpt/model/llama.py b/nemo/collections/llm/gpt/model/llama.py
index 79d688352457..7b235d59ee89 100644
--- a/nemo/collections/llm/gpt/model/llama.py
+++ b/nemo/collections/llm/gpt/model/llama.py
@@ -322,7 +322,12 @@ def convert_state(self, source, target):
             "decoder.final_layernorm.weight": "model.norm.weight",
         }
 
-        return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head])
+        return io.apply_transforms(
+            source,
+            target,
+            mapping=mapping,
+            transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head],
+        )
 
     @property
     def tokenizer(self):
@@ -431,7 +436,7 @@ def _export_qkv(ctx: io.TransformCTX, linear_qkv):
 def _export_embedding(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
 
 
 @io.state_transform(
@@ -441,7 +446,7 @@ def _export_embedding(ctx: io.TransformCTX, embedding):
 def _export_head(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
 
 
 @io.state_transform(
diff --git a/nemo/collections/llm/gpt/model/mistral.py b/nemo/collections/llm/gpt/model/mistral.py
index 3875d7bee1dc..a71042e2ba6f 100644
--- a/nemo/collections/llm/gpt/model/mistral.py
+++ b/nemo/collections/llm/gpt/model/mistral.py
@@ -218,7 +218,12 @@ def convert_state(self, source, target):
             "decoder.final_layernorm.weight": "model.norm.weight",
         }
 
-        return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head])
+        return io.apply_transforms(
+            source,
+            target,
+            mapping=mapping,
+            transforms=[_export_qkv, _export_linear_fc1, _export_embedding, _export_head],
+        )
 
     @property
     def tokenizer(self):
diff --git a/nemo/collections/llm/gpt/model/mixtral.py b/nemo/collections/llm/gpt/model/mixtral.py
index eb936584e03a..29361c38fda5 100644
--- a/nemo/collections/llm/gpt/model/mixtral.py
+++ b/nemo/collections/llm/gpt/model/mixtral.py
@@ -301,7 +301,12 @@ def convert_state(self, source, target):
             "decoder.final_layernorm.weight": "model.norm.weight",
         }
 
-        return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_moe_w1_w3, _export_embedding, _export_head])
+        return io.apply_transforms(
+            source,
+            target,
+            mapping=mapping,
+            transforms=[_export_qkv, _export_moe_w1_w3, _export_embedding, _export_head],
+        )
 
     @property
     def tokenizer(self):
diff --git a/nemo/collections/llm/gpt/model/nemotron.py b/nemo/collections/llm/gpt/model/nemotron.py
index bed627c10649..8fdc5f8f0f00 100644
--- a/nemo/collections/llm/gpt/model/nemotron.py
+++ b/nemo/collections/llm/gpt/model/nemotron.py
@@ -242,7 +242,9 @@ def convert_state(self, source, target):
             "decoder.final_layernorm.bias": "model.norm.bias",
         }
 
-        return io.apply_transforms(source, target, mapping=mapping, transforms=[_export_qkv, _export_embedding, _export_head])
+        return io.apply_transforms(
+            source, target, mapping=mapping, transforms=[_export_qkv, _export_embedding, _export_head]
+        )
 
     @property
     def tokenizer(self):
diff --git a/nemo/collections/llm/gpt/model/qwen2.py b/nemo/collections/llm/gpt/model/qwen2.py
index 984844d8643f..4cf0292d1a6a 100644
--- a/nemo/collections/llm/gpt/model/qwen2.py
+++ b/nemo/collections/llm/gpt/model/qwen2.py
@@ -201,7 +201,10 @@ def convert_state(self, source, target):
         }
 
         return io.apply_transforms(
-            source, target, mapping=mapping, transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1, _export_embedding, _export_head]
+            source,
+            target,
+            mapping=mapping,
+            transforms=[_export_qkv, _export_qkv_bias, _export_linear_fc1, _export_embedding, _export_head],
         )
 
     @property
diff --git a/nemo/collections/llm/gpt/model/starcoder.py b/nemo/collections/llm/gpt/model/starcoder.py
index 5e461618d75b..b3e7b25f705b 100644
--- a/nemo/collections/llm/gpt/model/starcoder.py
+++ b/nemo/collections/llm/gpt/model/starcoder.py
@@ -16,9 +16,9 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Annotated, Callable, Optional
 
+import torch
 import torch.nn.functional as F
 from torch import nn
-import torch
 
 from nemo.collections.llm.gpt.model.base import GPTConfig, GPTModel, torch_dtype_from_mcore_config
 from nemo.collections.llm.utils import Config
@@ -233,7 +233,7 @@ def config(self) -> "HFStarcoderConfig":
 def _export_embedding(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
 
 
 @io.state_transform(
@@ -243,4 +243,4 @@ def _export_embedding(ctx: io.TransformCTX, embedding):
 def _export_head(ctx: io.TransformCTX, embedding):
     megatron_config = ctx.target.config
     # prune padding.
-    return embedding[:megatron_config.vocab_size, :]
+    return embedding[: megatron_config.vocab_size, :]
diff --git a/nemo/lightning/io/api.py b/nemo/lightning/io/api.py
index a5fb7daa7fc4..be9372f2e79b 100644
--- a/nemo/lightning/io/api.py
+++ b/nemo/lightning/io/api.py
@@ -63,6 +63,7 @@ def load_context(path: Path, subpath: Optional[str] = None, build: bool = True):
             path = path / 'context'
         return load(path, output_type=TrainerContext, subpath=subpath, build=build)
 
+
 def model_importer(target: Type[ConnectorMixin], ext: str) -> Callable[[Type[ConnT]], Type[ConnT]]:
     """
     Registers an importer for a model with a specified file extension and an optional default path.
diff --git a/nemo/lightning/io/connector.py b/nemo/lightning/io/connector.py
index 5706f1b4859b..fd7b814fe730 100644
--- a/nemo/lightning/io/connector.py
+++ b/nemo/lightning/io/connector.py
@@ -228,7 +228,8 @@ def nemo_load(
 
         model = load_context(path).model
         _trainer = trainer or Trainer(
-            devices=1, accelerator="cpu" if cpu else "gpu",
+            devices=1,
+            accelerator="cpu" if cpu else "gpu",
             strategy=MegatronStrategy(ddp="pytorch", setup_optimizers=False),
         )