Skip to content

Commit

Permalink
Merge branch 'main' into huiyingl/llm.generate_fixes
Browse files Browse the repository at this point in the history
Signed-off-by: HuiyingLi <[email protected]>
  • Loading branch information
HuiyingLi committed Oct 22, 2024
2 parents 302008e + 47f2446 commit a4cb8c6
Show file tree
Hide file tree
Showing 67 changed files with 6,009 additions and 535 deletions.
11 changes: 5 additions & 6 deletions .github/workflows/cicd-main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3887,24 +3887,23 @@ jobs:
rm -rf tests/collections/llm/gpt_pretrain_results
rm -rf tests/collections/llm/gpt_index_mappings
OPTIONAL_L2_NeMo_2_GPT_DDP_Param_Parity_check:
L2_NeMo_2_GPT_DDP_Param_Parity_check:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
if: contains(fromJSON(needs.cicd-test-container-setup.outputs.test_to_run), 'OPTIONAL_L2_NeMo_2_GPT_DDP_Param_Parity_check') || needs.cicd-test-container-setup.outputs.all == 'true'
with:
RUNNER: self-hosted-azure
SCRIPT: |
python tests/lightning/test_ddp_parity_checker.py \
TORCHDYNAMO_DISABLE=1 python tests/lightning/test_ddp_parity_checker.py \
--vocab-path=/home/TestData/nlp/megatron_gpt/data/gpt/vocab.json \
--merges-path=/home/TestData/nlp/megatron_gpt/data/gpt/merges.txt \
--data-path=/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document
AFTER_SCRIPT: |
rm -rf tests/collections/llm/gpt_pretrain_results
rm -rf tests/collections/llm/gpt_index_mappings
IS_OPTIONAL: true

L2_NeMo_2_SSM_Pretraining:
needs: [cicd-test-container-setup]
uses: ./.github/workflows/_test_template.yml
Expand Down Expand Up @@ -4423,7 +4422,7 @@ jobs:
- Speech_Checkpoints_tests
- L2_Stable_Diffusion_Training
- L2_NeMo_2_GPT_Pretraining_no_transformer_engine
#- OPTIONAL_L2_NeMo_2_GPT_DDP_Param_Parity_check
- L2_NeMo_2_GPT_DDP_Param_Parity_check
- L2_NeMo_2_HF_MODEL_IMPORT
- L2_NeMo_2_SSM_Pretraining
- L2_NeMo_2_SSM_Finetuning
Expand Down Expand Up @@ -4587,4 +4586,4 @@ jobs:
- name: "Pipeline not successful, set exit code to 1"
if: ${{ always() && steps.pipeline-conclusion.outputs.SUCCESS == 'false' }}
run: exit 1
run: exit 1
2 changes: 1 addition & 1 deletion Dockerfile.ci
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ RUN pip install nemo_run@git+https://github.com/NVIDIA/NeMo-Run.git@${NEMO_RUN_T
# Install NeMo requirements
ARG TE_TAG=7d576ed25266a17a7b651f2c12e8498f67e0baea
ARG MODELOPT_VERSION=0.17.0
ARG MCORE_TAG=0d89fc4c0d4394f915fffff11212d6957652337f
ARG MCORE_TAG=db7d37b54ef96e35f7afc56e29fffb60f5c957b9

ARG APEX_TAG=810ffae374a2b9cb4b5c5e28eaeca7d7998fca0c
RUN \
Expand Down
21 changes: 0 additions & 21 deletions docs/source/performance/performance_long_sequence.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,6 @@
- Container: [NeMo24.03.01.framework](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags)
- System: DGX-H100

<style>
table {
border-collapse: collapse;
}
th {
border: 1px solid;
padding: 5px;
text-align: center; /* Center-align all header cells */
}
td {
border: 1px solid;
padding: 5px;
}
th.top-border {
border-top: 2px solid;
}
td.speedup {
font-weight: bold;
}
</style>


<table>
<thead>
Expand Down
11 changes: 10 additions & 1 deletion nemo/collections/common/tokenizers/huggingface/auto_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# limitations under the License.

from collections import OrderedDict
from typing import Optional
from typing import List, Optional

from transformers import AutoTokenizer as AUTOTOKENIZER

Expand Down Expand Up @@ -43,6 +43,7 @@ def __init__(
sep_token: Optional[str] = None,
cls_token: Optional[str] = None,
unk_token: Optional[str] = None,
additional_special_tokens: Optional[List] = [],
use_fast: Optional[bool] = False,
trust_remote_code: Optional[bool] = False,
):
Expand All @@ -60,6 +61,7 @@ def __init__(
sep_token: token used for separating sequences
cls_token: class token. Usually equal to bos_token
unk_token: token to use for unknown tokens
additional_special_tokens: list of other tokens beside standard special tokens (bos, eos, pad, etc.). For example, sentinel tokens for T5 (<extra_id_0>, <extra_id_1>, etc.)
use_fast: whether to use fast HuggingFace tokenizer
"""
try:
Expand Down Expand Up @@ -124,10 +126,17 @@ def __init__(
elif self.tokenizer.cls_token is None and self.tokenizer.bos_token:
special_tokens_dict["cls_token"] = self.tokenizer.bos_token

# add additional special tokens (not standard special tokens such as bos, eod, sep)
if additional_special_tokens is not None:
special_tokens_dict["additional_special_tokens"] = additional_special_tokens

new_tokens_in_vocab = []
for token in [mask_token, bos_token, eos_token, pad_token, sep_token, cls_token, unk_token]:
if token is not None and token not in self.tokenizer.get_vocab():
new_tokens_in_vocab.append(token)
for token in additional_special_tokens:
if token is not None and token not in self.tokenizer.get_vocab():
new_tokens_in_vocab.append(token)

if len(new_tokens_in_vocab) > 0:
"""
Expand Down
13 changes: 13 additions & 0 deletions nemo/collections/diffusion/encoders/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
199 changes: 199 additions & 0 deletions nemo/collections/diffusion/encoders/conditioner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Union

import torch
import torch.nn as nn
from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5Tokenizer


class AbstractEmbModel(nn.Module):
def __init__(self, enable_lora_finetune=False, target_block=[], target_module=[]):
super().__init__()
self._is_trainable = None
self._ucg_rate = None
self._input_key = None

self.TARGET_BLOCK = target_block
self.TARGET_MODULE = target_module
if enable_lora_finetune:
self.lora_layers = []

@property
def is_trainable(self) -> bool:
return self._is_trainable

@property
def ucg_rate(self) -> Union[float, torch.Tensor]:
return self._ucg_rate

@property
def input_key(self) -> str:
return self._input_key

@is_trainable.setter
def is_trainable(self, value: bool):
self._is_trainable = value

@ucg_rate.setter
def ucg_rate(self, value: Union[float, torch.Tensor]):
self._ucg_rate = value

@input_key.setter
def input_key(self, value: str):
self._input_key = value

@is_trainable.deleter
def is_trainable(self):
del self._is_trainable

@ucg_rate.deleter
def ucg_rate(self):
del self._ucg_rate

@input_key.deleter
def input_key(self):
del self._input_key

def encode(self, *args, **kwargs):
raise NotImplementedError

def _enable_lora(self, lora_model):
for module_name, module in lora_model.named_modules():
if module.__class__.__name__ in self.TARGET_BLOCK:
tmp = {}
for sub_name, sub_module in module.named_modules():
if sub_module.__class__.__name__ in self.TARGET_MODULE:
if hasattr(sub_module, "input_size") and hasattr(
sub_module, "output_size"
): # for megatron ParallelLinear
lora = LoraWrapper(sub_module, sub_module.input_size, sub_module.output_size)
else: # for nn.Linear
lora = LoraWrapper(sub_module, sub_module.in_features, sub_module.out_features)
self.lora_layers.append(lora)
if sub_name not in tmp.keys():
tmp.update({sub_name: lora})
else:
print(f"Duplicate subnames are found in module {module_name}")
for sub_name, lora_layer in tmp.items():
lora_name = f'{sub_name}_lora'
module.add_module(lora_name, lora_layer)


class FrozenCLIPEmbedder(AbstractEmbModel):
"""Uses the CLIP transformer encoder for text (from Hugging Face)"""

LAYERS = ["last", "pooled", "hidden"]

def __init__(
self,
version="openai/clip-vit-large-patch14",
device="cuda",
max_length=77,
enable_lora_finetune=False,
layer="last",
layer_idx=None,
always_return_pooled=False,
dtype=torch.float,
):
super().__init__(enable_lora_finetune, target_block=["CLIPAttention", "CLIPMLP"], target_module=["Linear"])
self.tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
self.transformer = CLIPTextModel.from_pretrained(version, torch_dtype=dtype).to(device)
self.device = device
self.max_length = max_length
self.freeze()
if enable_lora_finetune:
self._enable_lora(self.transformer)
print(f"CLIP transformer encoder add {len(self.lora_layers)} lora layers.")

self.layer = layer
self.layer_idx = layer_idx
self.return_pooled = always_return_pooled
if layer == "hidden":
assert layer_idx is not None
assert 0 <= abs(layer_idx) <= 12

def freeze(self):
self.transformer = self.transformer.eval()
for param in self.parameters():
param.requires_grad = False

def forward(self, text, max_sequence_length=None):
batch_encoding = self.tokenizer(
text,
truncation=True,
max_length=max_sequence_length if max_sequence_length else self.max_length,
return_length=True,
return_overflowing_tokens=False,
padding="max_length",
return_tensors="pt",
)
tokens = batch_encoding["input_ids"].to(self.transformer.device, non_blocking=True)
outputs = self.transformer(input_ids=tokens, output_hidden_states=(self.layer == "hidden"))

if self.layer == "last":
z = outputs.last_hidden_state
elif self.layer == "pooled":
z = outputs.pooler_output[:, None, :]
else:
z = outputs.hidden_states[self.layer_idx]

# Pad the seq length to multiple of 8
seq_len = (z.shape[1] + 8 - 1) // 8 * 8
z = torch.nn.functional.pad(z, (0, 0, 0, seq_len - z.shape[1]), value=0.0)
if self.return_pooled:
return z, outputs.pooler_output
return z

def encode(self, text):
return self(text)


class FrozenT5Embedder(AbstractEmbModel):
def __init__(
self,
version="google/t5-v1_1-xxl",
max_length=512,
device="cuda",
dtype=torch.float,
):
super().__init__()
self.tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xxl", max_length=max_length)
self.transformer = T5EncoderModel.from_pretrained(version, torch_dtype=dtype).to(device)
self.max_length = max_length
self.freeze()
self.device = device
self.dtype = dtype

def freeze(self):
self.transformer = self.transformer.eval()
for param in self.parameters():
param.requires_grad = False

def forward(self, text, max_sequence_length=None):
batch_encoding = self.tokenizer(
text,
truncation=True,
max_length=max_sequence_length if max_sequence_length else self.max_length,
return_length=False,
return_overflowing_tokens=False,
padding="max_length",
return_tensors="pt",
)

tokens = batch_encoding["input_ids"].to(self.transformer.device, non_blocking=True)
outputs = self.transformer(input_ids=tokens, output_hidden_states=None)

return outputs.last_hidden_state
Loading

0 comments on commit a4cb8c6

Please sign in to comment.