From 2ce32432738a528c1719721225a8bb68ec92b703 Mon Sep 17 00:00:00 2001 From: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> Date: Tue, 26 Nov 2024 06:28:29 -0800 Subject: [PATCH] capitalize HF as HF instead of Hf (#11384) Signed-off-by: Alexandros Koumparoulis Signed-off-by: Alexandros Koumparoulis <153118171+akoumpa@users.noreply.github.com> --- examples/llm/peft/hf.py | 6 +++--- examples/llm/sft/hf.py | 2 +- nemo/collections/llm/__init__.py | 6 +++--- nemo/collections/llm/gpt/data/__init__.py | 4 ++-- nemo/collections/llm/gpt/data/api.py | 4 ++-- nemo/collections/llm/gpt/data/hf_dataset.py | 4 ++-- nemo/collections/llm/gpt/model/__init__.py | 4 ++-- .../gpt/model/hf_auto_model_for_causal_lm.py | 4 ++-- .../recipes/hf_auto_model_for_causal_lm.py | 20 +++++++++---------- .../pytorch/strategies/megatron_strategy.py | 2 +- 10 files changed, 28 insertions(+), 28 deletions(-) diff --git a/examples/llm/peft/hf.py b/examples/llm/peft/hf.py index 5b24c22ab79d..357dc5a7bd17 100644 --- a/examples/llm/peft/hf.py +++ b/examples/llm/peft/hf.py @@ -76,11 +76,11 @@ def formatting_prompts_func(examples): # See: https://github.com/Lightning-AI/pytorch-lightning/blob/8ad3e29816a63d8ce5c00ac104b14729a4176f4f/src/lightning/pytorch/plugins/precision/fsdp.py#L81 grad_clip = None use_dist_samp = False - tokenizer = llm.HfAutoModelForCausalLM.configure_tokenizer(args.model) + tokenizer = llm.HFAutoModelForCausalLM.configure_tokenizer(args.model) llm.api.finetune( - model=llm.HfAutoModelForCausalLM(args.model), - data=llm.HfDatasetDataModule( + model=llm.HFAutoModelForCausalLM(args.model), + data=llm.HFDatasetDataModule( mk_hf_dataset(tokenizer.tokenizer), pad_token_id=tokenizer.tokenizer.eos_token_id ), trainer=nl.Trainer( diff --git a/examples/llm/sft/hf.py b/examples/llm/sft/hf.py index 1d282312b130..ce79e136a1c2 100755 --- a/examples/llm/sft/hf.py +++ b/examples/llm/sft/hf.py @@ -84,7 +84,7 @@ def squad(tokenizer) -> pl.LightningDataModule: from nemo.lightning.pytorch.accelerate.transformer_engine import te_accelerate - model = llm.HfAutoModelForCausalLM(model_name=args.model, model_accelerator=model_accelerator) + model = llm.HFAutoModelForCausalLM(model_name=args.model, model_accelerator=model_accelerator) tokenizer = model.tokenizer llm.api.finetune( diff --git a/nemo/collections/llm/__init__.py b/nemo/collections/llm/__init__.py index c36da39b43c7..f17128cdb36d 100644 --- a/nemo/collections/llm/__init__.py +++ b/nemo/collections/llm/__init__.py @@ -22,7 +22,7 @@ AlpacaDataModule, DollyDataModule, FineTuningDataModule, - HfDatasetDataModule, + HFDatasetDataModule, MockDataModule, PreTrainingDataModule, SquadDataModule, @@ -64,7 +64,7 @@ GPTConfig126M, GPTConfig175B, GPTModel, - HfAutoModelForCausalLM, + HFAutoModelForCausalLM, Llama2Config7B, Llama2Config13B, Llama2Config70B, @@ -218,7 +218,7 @@ "dolly", "peft", "hf_dataset", - "HfAutoModelForCausalLM", + "HFAutoModelForCausalLM", ] diff --git a/nemo/collections/llm/gpt/data/__init__.py b/nemo/collections/llm/gpt/data/__init__.py index b42c350bcaba..c8690fd0668f 100644 --- a/nemo/collections/llm/gpt/data/__init__.py +++ b/nemo/collections/llm/gpt/data/__init__.py @@ -15,7 +15,7 @@ from nemo.collections.llm.gpt.data.alpaca import AlpacaDataModule from nemo.collections.llm.gpt.data.dolly import DollyDataModule from nemo.collections.llm.gpt.data.fine_tuning import FineTuningDataModule -from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule +from nemo.collections.llm.gpt.data.hf_dataset import HFDatasetDataModule from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.data.pre_training import PreTrainingDataModule, build_pretraining_datamodule from nemo.collections.llm.gpt.data.squad import SquadDataModule @@ -28,5 +28,5 @@ "MockDataModule", "PreTrainingDataModule", "build_pretraining_datamodule", - "HfDatasetDataModule", + "HFDatasetDataModule", ] diff --git a/nemo/collections/llm/gpt/data/api.py b/nemo/collections/llm/gpt/data/api.py index 2ebb30e781d1..374bee83b8b2 100644 --- a/nemo/collections/llm/gpt/data/api.py +++ b/nemo/collections/llm/gpt/data/api.py @@ -16,7 +16,7 @@ import nemo_run as run from nemo.collections.llm.gpt.data.dolly import DollyDataModule -from nemo.collections.llm.gpt.data.hf_dataset import HfDatasetDataModule +from nemo.collections.llm.gpt.data.hf_dataset import HFDatasetDataModule from nemo.collections.llm.gpt.data.mock import MockDataModule from nemo.collections.llm.gpt.data.squad import SquadDataModule @@ -42,7 +42,7 @@ def dolly() -> pl.LightningDataModule: @run.cli.factory @run.autoconvert def hf_dataset(dataset: str) -> pl.LightningDataModule: - return HfDatasetDataModule(dataset=dataset, global_batch_size=16, micro_batch_size=2) + return HFDatasetDataModule(dataset=dataset, global_batch_size=16, micro_batch_size=2) __all__ = ["mock", "squad", "dolly", "hf_dataset"] diff --git a/nemo/collections/llm/gpt/data/hf_dataset.py b/nemo/collections/llm/gpt/data/hf_dataset.py index 46562b6e72c8..0f45ecf265b7 100644 --- a/nemo/collections/llm/gpt/data/hf_dataset.py +++ b/nemo/collections/llm/gpt/data/hf_dataset.py @@ -18,7 +18,7 @@ from nemo.lightning.pytorch.plugins import MegatronDataSampler -class HfDatasetDataModule(pl.LightningDataModule): +class HFDatasetDataModule(pl.LightningDataModule): def __init__( self, dataset, @@ -88,7 +88,7 @@ def train_dataloader(self, collate_fn=None): from nemo.lightning.data import add_megatron_sampler if collate_fn is None: - collate_fn = lambda x: HfDatasetDataModule.collate_fn(x, pad_token_id=self.pad_token_id) + collate_fn = lambda x: HFDatasetDataModule.collate_fn(x, pad_token_id=self.pad_token_id) return DataLoader( self.dataset, diff --git a/nemo/collections/llm/gpt/model/__init__.py b/nemo/collections/llm/gpt/model/__init__.py index 9f186ebba90f..4e9448eaef2c 100644 --- a/nemo/collections/llm/gpt/model/__init__.py +++ b/nemo/collections/llm/gpt/model/__init__.py @@ -45,7 +45,7 @@ Gemma2Config27B, Gemma2Model, ) -from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HfAutoModelForCausalLM +from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HFAutoModelForCausalLM from nemo.collections.llm.gpt.model.llama import ( CodeLlamaConfig7B, CodeLlamaConfig13B, @@ -191,5 +191,5 @@ "transformer_engine_layer_spec", "transformer_engine_full_layer_spec", "local_layer_spec", - "HfAutoModelForCausalLM", + "HFAutoModelForCausalLM", ] diff --git a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py index 26e4604adc43..8f4595bd6cee 100644 --- a/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/gpt/model/hf_auto_model_for_causal_lm.py @@ -31,7 +31,7 @@ def masked_cross_entropy(logits, targets, mask=None): return F.cross_entropy(logits, targets) -class HfAutoModelForCausalLM(pl.LightningModule, io.IOMixin, fn.FNMixin): +class HFAutoModelForCausalLM(pl.LightningModule, io.IOMixin, fn.FNMixin): def __init__( self, model_name='gpt2', @@ -57,7 +57,7 @@ def __init__( @property def tokenizer(self): if self._tokenizer is None: - self._tokenizer = HfAutoModelForCausalLM.configure_tokenizer(self.model_name, self.trust_remote_code) + self._tokenizer = HFAutoModelForCausalLM.configure_tokenizer(self.model_name, self.trust_remote_code) return self._tokenizer @tokenizer.setter diff --git a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py index d93b167b45b6..5d2bea23686c 100644 --- a/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py +++ b/nemo/collections/llm/recipes/hf_auto_model_for_causal_lm.py @@ -23,7 +23,7 @@ from nemo import lightning as nl from nemo.collections.llm.api import finetune, pretrain from nemo.collections.llm.gpt.data.mock import MockDataModule -from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HfAutoModelForCausalLM +from nemo.collections.llm.gpt.model.hf_auto_model_for_causal_lm import HFAutoModelForCausalLM from nemo.collections.llm.peft.lora import LoRA from nemo.collections.llm.recipes.log.default import default_log, default_resume, tensorboard_logger from nemo.collections.llm.recipes.optim.adam import pytorch_adam_with_cosine_annealing @@ -35,23 +35,23 @@ @run.cli.factory(name=NAME) def model(model_name, load_pretrained_weights) -> run.Config[pl.LightningModule]: """ - Factory function to create HfAutoModelForCausalLM model configurations. + Factory function to create HFAutoModelForCausalLM model configurations. Args: model_name (str): Model id on HF. Returns: - run.Config[pl.LightningModule]: Configuration for the HfAutoModelForCausalLM. + run.Config[pl.LightningModule]: Configuration for the HFAutoModelForCausalLM. Examples: CLI usage: - $ nemo llm pretrain --factory 'HfAutoModelForCausalLM(model_name="mistralai/Mistral-Nemo-Instruct-2407")' + $ nemo llm pretrain --factory 'HFAutoModelForCausalLM(model_name="mistralai/Mistral-Nemo-Instruct-2407")' Python API usage: >>> model_config = model(model_name="mistralai/Mistral-Nemo-Instruct-2407") >>> print(model_config) """ - return run.Config(HfAutoModelForCausalLM, model_name=model_name, load_pretrained_weights=load_pretrained_weights) + return run.Config(HFAutoModelForCausalLM, model_name=model_name, load_pretrained_weights=load_pretrained_weights) def trainer( @@ -69,7 +69,7 @@ def trainer( gradient_clip_val: float = 1.0, ) -> run.Config[nl.Trainer]: """ - Configure the NeMo Lightning Trainer for HfAutoModelForCausalLM. + Configure the NeMo Lightning Trainer for HFAutoModelForCausalLM. This function sets up the distributed training strategy and other training parameters. @@ -91,7 +91,7 @@ def trainer( Examples: CLI usage: - $ nemo llm pretrain trainer=HfAutoModelForCausalLM ... + $ nemo llm pretrain trainer=HFAutoModelForCausalLM ... Python API usage: >>> trainer_config = trainer(num_nodes=2, num_gpus_per_node=8) @@ -131,7 +131,7 @@ def pretrain_recipe( model_name: str = '', ) -> run.Partial: """ - Create a pre-training recipe for a HfAutoModelForCausalLM model. + Create a pre-training recipe for a HFAutoModelForCausalLM model. This function sets up a complete configuration for pre-training, including model, trainer, data, logging, optimization, and resumption settings. @@ -148,7 +148,7 @@ def pretrain_recipe( Examples: CLI usage: - $ nemo llm pretrain --factory 'HfAutoModelForCausalLM(model_name="mistralai/Mistral-Nemo-Instruct-2407")' + $ nemo llm pretrain --factory 'HFAutoModelForCausalLM(model_name="mistralai/Mistral-Nemo-Instruct-2407")' Python API usage: >>> recipe = pretrain_recipe(name="auto_pretrain", num_nodes=2, model_name="mistralai/Mistral-Nemo-Instruct-2407") @@ -179,7 +179,7 @@ def finetune_recipe( model_name: str = '', ) -> run.Partial: """ - Create a fine-tuning recipe for a HfAutoModelForCausalLM model. + Create a fine-tuning recipe for a HFAutoModelForCausalLM model. This function sets up a complete configuration for fine-tuning, including model, trainer, data, logging, optimization, and resumption settings. diff --git a/nemo/lightning/pytorch/strategies/megatron_strategy.py b/nemo/lightning/pytorch/strategies/megatron_strategy.py index 2b9caf24bce6..45dcf24c0890 100644 --- a/nemo/lightning/pytorch/strategies/megatron_strategy.py +++ b/nemo/lightning/pytorch/strategies/megatron_strategy.py @@ -278,7 +278,7 @@ def connect(self, model: pl.LightningModule) -> None: """Attaches a model to strategy.""" super().connect(model) - assert not 'is_hf_model' in model.__dict__, "Cannot use HfAutoModelForCausalLM with MegatronParallel" + assert not 'is_hf_model' in model.__dict__, "Cannot use HFAutoModelForCausalLM with MegatronParallel" dtype_config = getattr(self._precision_plugin, "dtype_config", None) if self.pipeline_dtype is None and dtype_config: