Skip to content

Commit

Permalink
Add more recipes (#10957)
Browse files Browse the repository at this point in the history
* add recipes

Signed-off-by: Chen Cui <[email protected]>

* adjust finetuning recipe

Signed-off-by: Chen Cui <[email protected]>

* Apply isort and black reformatting

Signed-off-by: cuichenx <[email protected]>

---------

Signed-off-by: Chen Cui <[email protected]>
Signed-off-by: cuichenx <[email protected]>
Co-authored-by: cuichenx <[email protected]>
Signed-off-by: Chen Cui <[email protected]>
  • Loading branch information
cuichenx and cuichenx committed Oct 25, 2024
1 parent b4dd2bd commit 6a8cefc
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 6 deletions.
2 changes: 1 addition & 1 deletion nemo/collections/llm/gpt/model/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _import_qkv(ctx: io.TransformCTX, qkv_weights):
q = qkv_weights[0].squeeze().view(*new_q_tensor_shape)
k = qkv_weights[1].squeeze().view(*new_kv_tensor_shape)
v = qkv_weights[2].squeeze().view(*new_kv_tensor_shape)
qkv_weights = torch.empty((0, head_size) + old_tensor_shape[1:])
qkv_weights = torch.empty((0, head_size) + old_tensor_shape[1:]).type_as(qkv_weights)
for i in range(num_query_groups):
qkv_weights = torch.cat((qkv_weights, q[i * heads_per_group : (i + 1) * heads_per_group, :, :]))
qkv_weights = torch.cat((qkv_weights, k[i : i + 1, :, :]))
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/llm/gpt/model/chatglm.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def _import_qkv_weight(ctx: io.TransformCTX, hf_qkv_weights):
k = k.view(*new_kv_tensor_shape)
v = v.view(*new_kv_tensor_shape)

qkv_weights = torch.empty((0, head_size, old_tensor_shape[1]))
qkv_weights = torch.empty((0, head_size, old_tensor_shape[1])).type_as(hf_qkv_weights)
for i in range(num_query_groups):
qkv_weights = torch.cat((qkv_weights, q[i * heads_per_group : (i + 1) * heads_per_group, :, :]))
qkv_weights = torch.cat((qkv_weights, k[i : i + 1, :, :]))
Expand Down Expand Up @@ -245,7 +245,7 @@ def _import_qkv_bias(ctx: io.TransformCTX, hf_qkv_bias):
q = q.view(*new_q_tensor_shape)
k = k.view(*new_kv_tensor_shape)
v = v.view(*new_kv_tensor_shape)
qkv_bias = torch.empty((0, head_size))
qkv_bias = torch.empty((0, head_size)).type_as(hf_qkv_bias)
for i in range(num_query_groups):
qkv_bias = torch.cat((qkv_bias, q[i * heads_per_group : (i + 1) * heads_per_group, :]))
qkv_bias = torch.cat((qkv_bias, k[i : i + 1, :]))
Expand Down
8 changes: 8 additions & 0 deletions nemo/collections/llm/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@


from nemo.collections.llm.recipes import (
baichuan2_7b,
chatglm3_6b,
gemma_2b,
gemma_7b,
llama3_8b,
llama3_8b_16k,
llama3_8b_64k,
Expand All @@ -40,6 +44,10 @@
from nemo.collections.llm.recipes.optim import adam

__all__ = [
"baichuan2_7b",
"chatglm3_6b",
"gemma_2b",
"gemma_7b",
"llama3_8b",
"llama3_8b_16k",
"llama3_8b_64k",
Expand Down
8 changes: 5 additions & 3 deletions nemo/collections/llm/recipes/optim/adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ def distributed_fused_adam_with_cosine_annealing(
precision: str = "bf16-mixed", # or "16-mixed"
warmup_steps: int = 2000,
constant_steps: int = 0,
adam_beta1: float = 0.9,
adam_beta2: float = 0.95,
max_lr: float = 1e-4,
min_lr: Optional[float] = None,
clip_grad: float = 1.0,
Expand All @@ -37,14 +39,14 @@ def distributed_fused_adam_with_cosine_annealing(
weight_decay=0.1,
bf16=precision == "bf16-mixed",
fp16=precision == "16-mixed",
adam_beta1=0.9,
adam_beta2=0.95,
adam_beta1=adam_beta1,
adam_beta2=adam_beta2,
adam_eps=1e-5,
use_distributed_optimizer=True,
clip_grad=clip_grad,
)

min_lr = min_lr or (0.1 * max_lr)
min_lr = min_lr if min_lr is not None else (0.1 * max_lr)
sched = run.Config(
CosineAnnealingScheduler,
warmup_steps=warmup_steps,
Expand Down

0 comments on commit 6a8cefc

Please sign in to comment.