From 851a10e55363f727f961880d99a3e016c7bbfd09 Mon Sep 17 00:00:00 2001
From: akoumpa <akoumpa@users.noreply.github.com>
Date: Wed, 23 Oct 2024 21:57:00 +0000
Subject: [PATCH] Apply isort and black reformatting

Signed-off-by: akoumpa <akoumpa@users.noreply.github.com>
---
 examples/llm/peft/hf.py                       | 22 +++++++++++--------
 .../gpt/model/hf_auto_model_for_causal_lm.py  |  2 +-
 nemo/collections/llm/peft/lora.py             | 13 +++++++----
 .../recipes/hf_auto_model_for_causal_lm.py    |  1 -
 4 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/examples/llm/peft/hf.py b/examples/llm/peft/hf.py
index 414477ee534e..c6dbbf90bf29 100644
--- a/examples/llm/peft/hf.py
+++ b/examples/llm/peft/hf.py
@@ -19,7 +19,8 @@
 
 
 def mk_hf_dataset(tokenizer):
-    EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
+    EOS_TOKEN = tokenizer.eos_token  # Must add EOS_TOKEN
+
     def formatting_prompts_func(examples):
         alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
@@ -32,18 +33,22 @@ def formatting_prompts_func(examples):
     ### Response:
     {}"""
         instruction = examples["context"]
-        input       = examples["question"]
-        output      = examples["answers"]['text']
+        input = examples["question"]
+        output = examples["answers"]['text']
         if isinstance(output, list):
             output = output[0]
         text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
         ans = tokenizer(text)
         tokens = ans['input_ids']
-        return {'tokens': tokens, 'labels': tokens[1:] + [tokens[-1]], }
+        return {
+            'tokens': tokens,
+            'labels': tokens[1:] + [tokens[-1]],
+        }
 
     from datasets import load_dataset
-    dataset = load_dataset("rajpurkar/squad", split = "train")
-    dataset = dataset.map(formatting_prompts_func, batched = False, batch_size = 2)
+
+    dataset = load_dataset("rajpurkar/squad", split="train")
+    dataset = dataset.map(formatting_prompts_func, batched=False, batch_size=2)
     return dataset
 
 
@@ -76,8 +81,7 @@ def formatting_prompts_func(examples):
     llm.api.finetune(
         model=llm.HfAutoModelForCausalLM(args.model),
         data=llm.HfDatasetDataModule(
-            mk_hf_dataset(tokenizer.tokenizer),
-            pad_token_id=tokenizer.tokenizer.eos_token_id
+            mk_hf_dataset(tokenizer.tokenizer), pad_token_id=tokenizer.tokenizer.eos_token_id
         ),
         trainer=nl.Trainer(
             devices=args.devices,
@@ -98,4 +102,4 @@ def formatting_prompts_func(examples):
             target_modules=['*_proj'],
             dim=32,
         ),
-    )
\ No newline at end of file
+    )
diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
index d36910c4b52d..f29756dc05a7 100644
--- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
+++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py
@@ -16,9 +16,9 @@
 import torch
 import torch.nn.functional as F
 from transformers import AutoModelForCausalLM
-from nemo.collections.llm import fn
 
 from nemo.collections.common.tokenizers.huggingface.auto_tokenizer import AutoTokenizer
+from nemo.collections.llm import fn
 from nemo.lightning import io
 
 
diff --git a/nemo/collections/llm/peft/lora.py b/nemo/collections/llm/peft/lora.py
index fd4ef254c3ed..ecebf696a42c 100644
--- a/nemo/collections/llm/peft/lora.py
+++ b/nemo/collections/llm/peft/lora.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
-import torch
 import math
+import re
 from dataclasses import dataclass, field
 from typing import List, Literal
 
+import torch
 from megatron.core import parallel_state
 from megatron.core.tensor_parallel import ColumnParallelLinear, RowParallelLinear
 from torch import nn
@@ -72,7 +72,9 @@ def forward(self, x):
 
 
 class LinearAdapter(nn.Module):
-    def __init__(self, orig_linear, dim=8, alpha=32, dropout=0.1, dropout_position='post', lora_A_init_method='xavier'):
+    def __init__(
+        self, orig_linear, dim=8, alpha=32, dropout=0.1, dropout_position='post', lora_A_init_method='xavier'
+    ):
         super(LinearAdapter, self).__init__()
         assert isinstance(orig_linear, nn.Linear)
 
@@ -111,6 +113,7 @@ def forward(self, x):
             lora_res = self.dropout(lora_res)
         return res + lora_res
 
+
 @dataclass
 class LoRA(PEFT):
     """
@@ -212,7 +215,9 @@ def wildcard_match(pattern, key):
                 in_features = m.input_size
                 out_features = m.output_size
             elif isinstance(m, nn.Linear):
-                return LinearAdapter(m, dim=self.dim, alpha=self.alpha, dropout=self.dropout, lora_A_init_method=self.lora_A_init_method)
+                return LinearAdapter(
+                    m, dim=self.dim, alpha=self.alpha, dropout=self.dropout, lora_A_init_method=self.lora_A_init_method
+                )
             else:
                 raise NotImplementedError(f"Layer type is unrecognized for LoRA: {type(m)}")
 
diff --git a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py
index b554bc7051cb..f3ac1d6975bc 100644
--- a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py
+++ b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py
@@ -168,7 +168,6 @@ def pretrain_recipe(
     )
 
 
-
 @run.cli.factory(target=finetune, name=NAME)
 def finetune_recipe(
     dir: Optional[str] = None,