From 73913d245dbfa1e348045f99c64a36e3a98a6742 Mon Sep 17 00:00:00 2001
From: akoumpa <akoumpa@users.noreply.github.com>
Date: Wed, 23 Oct 2024 20:45:03 +0000
Subject: [PATCH] Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
---
 examples/llm/sft/hf.py                        | 25 +++++++++++++------
 examples/llm/sft/lora.py                      | 10 +++++---
 nemo/collections/llm/__init__.py              |  2 +-
 nemo/collections/llm/gpt/data/__init__.py     |  4 +--
 nemo/collections/llm/gpt/data/hf_dataset.py   | 23 ++++++++---------
 .../gpt/model/hf_auto_model_for_causal_lm.py  |  3 ++-
 nemo/collections/llm/peft/lora.py             |  5 ++--
 7 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/examples/llm/sft/hf.py b/examples/llm/sft/hf.py
index 59a20e5e530fb..e3b51d2a9257c 100644
--- a/examples/llm/sft/hf.py
+++ b/examples/llm/sft/hf.py
@@ -45,8 +45,10 @@ def _create_dataloader(self, dataset, **kwargs) -> DataLoader:
             **kwargs1,
         )
 
+
 def mk_hf_dataset(tokenizer):
-    EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
+    EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN
+
     def formatting_prompts_func(examples):
         alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
@@ -59,20 +61,25 @@ def formatting_prompts_func(examples):
     ### Response:
     {}"""
         instruction = examples["context"]
-        input       = examples["question"]
-        output      = examples["answers"]['text']
+        input = examples["question"]
+        output = examples["answers"]['text']
         if isinstance(output, list):
             output = output[0]
         text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
         ans = tokenizer(text)
         tokens = ans['input_ids']
-        return {'tokens': tokens, 'labels': tokens[1:] + [tokens[-1]], }
+        return {
+            'tokens': tokens,
+            'labels': tokens[1:] + [tokens[-1]],
+        }
 
     from datasets import load_dataset
-    dataset = load_dataset("rajpurkar/squad", split = "train")
-    dataset = dataset.map(formatting_prompts_func, batched = False, batch_size = 2)
+
+    dataset = load_dataset("rajpurkar/squad", split="train")
+    dataset = dataset.map(formatting_prompts_func, batched=False, batch_size=2)
     return dataset
 
+
 def squad(tokenizer) -> pl.LightningDataModule:
     return SquadDataModuleWithMbs(
         tokenizer=tokenizer,
@@ -83,13 +90,16 @@ def squad(tokenizer) -> pl.LightningDataModule:
         sanity_check_dist_workers=False,
     )
 
+
 class HfAutoModelPeft(llm.HfAutoModel):
     def configure_model(self):
         super().configure_model()
         self.model.eval()
         from lora import apply_lora_to_model
+
         apply_lora_to_model(self.model)
 
+
 if __name__ == '__main__':
     import argparse
 
@@ -119,8 +129,7 @@ def configure_model(self):
     llm.api.finetune(
         model=HfAutoModelPeft(args.model),
         data=llm.HfDatasetDataModule(
-            mk_hf_dataset(tokenizer.tokenizer),
-            pad_token_id=tokenizer.tokenizer.eos_token_id
+            mk_hf_dataset(tokenizer.tokenizer), pad_token_id=tokenizer.tokenizer.eos_token_id
         ),
         trainer=nl.Trainer(
             devices=args.devices,
diff --git a/examples/llm/sft/lora.py b/examples/llm/sft/lora.py
index 92dace0d73c81..0d590ed8d95b7 100644
--- a/examples/llm/sft/lora.py
+++ b/examples/llm/sft/lora.py
@@ -1,7 +1,9 @@
-import torch.nn as nn
-import torch
 import math
 
+import torch
+import torch.nn as nn
+
+
 class LoraLinear(nn.Module):
     def __init__(self, orig_linear, r=8, lora_alpha=32, lora_dropout=0.1):
         super(LoraLinear, self).__init__()
@@ -31,6 +33,7 @@ def forward(self, x):
         lora_res = lora_res @ self.lora_b.t()
         return res + lora_res * self.scale
 
+
 # Helper funcs
 def get_parent_module(model, module_name):
     print('get_parent_module module_name= ' + str(module_name))
@@ -40,6 +43,7 @@ def get_parent_module(model, module_name):
         parent = getattr(parent, name)
     return parent
 
+
 def apply_lora_to_model(model, r=8, lora_alpha=32, lora_dropout=0.1):
     for name, module in model.named_modules():
         if isinstance(module, nn.Linear) and '_proj' in name:
@@ -47,4 +51,4 @@ def apply_lora_to_model(model, r=8, lora_alpha=32, lora_dropout=0.1):
             target_attr = name.split('.')[-1]
             orig_lin = getattr(parent_module, target_attr)
             lora_linear = LoraLinear(orig_lin, r, lora_alpha, lora_dropout)
-            setattr(parent_module, target_attr, lora_linear)
\ No newline at end of file
+            setattr(parent_module, target_attr, lora_linear)
diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py
index ec3c0252f6555..6dde88079567a 100644
--- a/nemo/collections/llm/__init__.py
+++ b/nemo/collections/llm/__init__.py
@@ -21,10 +21,10 @@
 from nemo.collections.llm.gpt.data import (
     DollyDataModule,
     FineTuningDataModule,
+    HfDatasetDataModule,
     MockDataModule,
     PreTrainingDataModule,
     SquadDataModule,
-    HfDatasetDataModule,
 )
 from nemo.collections.llm.gpt.data.api import dolly, mock, squad
 from nemo.collections.llm.gpt.model import (
diff --git a/nemo/collections/llm/gpt/data/__init__.py b/nemo/collections/llm/gpt/data/__init__.py
index 4d3d21fb654b2..f4e97d91e5cd5 100644
--- a/nemo/collections/llm/gpt/data/__init__.py
+++ b/nemo/collections/llm/gpt/data/__init__.py
@@ -14,10 +14,10 @@
 
 from nemo.collections.llm.gpt.data.dolly import DollyDataModule
 from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule
+from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule
 from nemo.collections.llm.gpt.data.mock import MockDataModule
 from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule
 from nemo.collections.llm.gpt.data.squad import SquadDataModule
-from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule
 
 __all__ = [
     "FineTuningDataModule",
@@ -25,5 +25,5 @@
     "DollyDataModule",
     "MockDataModule",
     "PreTrainingDataModule",
-    "HfDatasetDataModule"
+    "HfDatasetDataModule",
 ]
diff --git a/nemo/collections/llm/gpt/data/hf_dataset.py b/nemo/collections/llm/gpt/data/hf_dataset.py
index 083f5fbc52352..7e70a970913e6 100644
--- a/nemo/collections/llm/gpt/data/hf_dataset.py
+++ b/nemo/collections/llm/gpt/data/hf_dataset.py
@@ -21,14 +21,14 @@ class HfDatasetDataModule(pl.LightningDataModule):
     def __init__(
         self,
         dataset,
-        num_workers = 2,
-        pin_memory = True,
-        persistent_workers = True,
-        micro_batch_size = 2,
-        global_batch_size = 2,
-        pad_token_id = 0,
-        use_mcore_sampler = False,
-        mcore_dataloader_type = 'cyclic',
+        num_workers=2,
+        pin_memory=True,
+        persistent_workers=True,
+        micro_batch_size=2,
+        global_batch_size=2,
+        pad_token_id=0,
+        use_mcore_sampler=False,
+        mcore_dataloader_type='cyclic',
     ) -> None:
         super().__init__()
         assert pad_token_id is not None
@@ -56,10 +56,8 @@ def extract_key_from_dicts(batch, key):
 
         def pad_within_micro(batch, pad_token_id):
             max_len = max(map(len, batch))
-            return [
-                item + [pad_token_id] * (max_len - len(item))
-                for item in batch
-            ]
+            return [item + [pad_token_id] * (max_len - len(item)) for item in batch]
+
         return {
             key: batchify(
                 torch.LongTensor(
@@ -103,4 +101,3 @@ def train_dataloader(self, collate_fn=None):
             rank=rank,
             world_size=world_size,
         )
-
diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
index a9f53ea4158f9..f29756dc05a7a 100644
--- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
+++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
@@ -18,8 +18,8 @@
 from transformers import AutoModelForCausalLM
 
 from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
-from nemo.lightning import io
 from nemo.collections.llm import fn
+from nemo.lightning import io
 
 
 def _extract_non_bias_params(model):
@@ -66,6 +66,7 @@ def configure_model(self):
             self.model = AutoModelForCausalLM.from_pretrained(self.model_name, torch_dtype='auto')
         else:
             from transformers import AutoConfig
+
             config = AutoConfig.from_pretained(self.model_name)
             self.model = AutoModelForCausalLM.from_config(config)
         self.model.train()
diff --git a/nemo/collections/llm/peft/lora.py b/nemo/collections/llm/peft/lora.py
index 18db9b164d6aa..dfc91eb764234 100644
--- a/nemo/collections/llm/peft/lora.py
+++ b/nemo/collections/llm/peft/lora.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import torch
 import math
+import re
 from dataclasses import dataclass, field
 from typing import List, Literal
 
+import torch
 from megatron.core import parallel_state
 from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear
 from torch import nn
@@ -107,6 +107,7 @@ def forward(self, x):
             lora_res = self.dropout(lora_res)
         return res + lora_res
 
+
 @dataclass
 class LoRA(PEFT):
     """