Skip to content

Commit

Permalink
gemma configs
Browse files Browse the repository at this point in the history
  • Loading branch information
rasbt committed Mar 27, 2024
1 parent af8a39d commit 595ae45
Show file tree
Hide file tree
Showing 6 changed files with 257 additions and 10 deletions.
9 changes: 6 additions & 3 deletions config_hub/finetune/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ For more information, see the [Dealing with out-of-memory (OOM) errors](../../tu
| falcon-7b/lora.yaml | 7B | Alpaca 2k | 4 | 0.945 | 16.69 GB | 512 | 2 | bfloat16 | 24.88 min (1xA10G) |
| falcon-7b/qlora.yaml | 7B | Alpaca 2k | 4 | 0.993 | 9.44 GB | 512 | 2 | bfloat16 | 50.76 min (1xA10G) |
| | | | | | | | | | |
| gemma-2b/lora.yaml | 2B | Alpaca 2k | 3 | 1.476 | 12.62 GB | 512 | 2 | bfloat16 | 18.31 min (1xA10G) |
| gemma-2b/qlora.yaml | 2B | Alpaca 2k | 3 | 1.626 | 11.51 GB | 512 | 2 | bfloat16 | 25.29 min (1xA10G) |
| gemma-2b/full.yaml | 2B | Alpaca 2k | 0.35 | 1.046 | 18.47 GB | 512 | 2 | bfloat16 | 16.79 min (2xA10G) |
| gemma-2b/lora.yaml | 2B | Alpaca 2k | 2 | 1.476 | 12.62 GB | 512 | 2 | bfloat16 | 9.29 min (1xA10G) |
| gemma-2b/qlora.yaml | 2B | Alpaca 2k | 2 | 0.981 | 11.59 GB | 512 | 2 | bfloat16 | 12.90 min (1xA10G) |
| gemma-2b/full.yaml | 2B | Alpaca 2k | 0.35 | 0.990 | 17.43 GB | 512 | 1 | bfloat16 | 13.61 min (4xA10G) |
| | | | | | | | | | |
| gemma-7b/lora.yaml | 7B | Alpaca 2k | 2 | 0.903 | 25.30 GB | 512 | 1 | bfloat16 | 11.47 min (1xA100) |
| gemma-7b/qlora.yaml | 7B | Alpaca 2k | 2 | 0.951 | 17.31 GB | 512 | 1 | bfloat16 | 23.46 min (1xA100) |
| | | | | | | | | | |
| llama-2-7b/lora.yaml | 7B | Alpaca 2k | 4 | 0.802 | 19.77 GB | 512 | 2 | bfloat16 | 32.75 min (A10G) |
| llama-2-7b/qlora.yaml | 7B | Alpaca 2k | 4 | 0.814 | 13.68 GB | 512 | 2 | bfloat16 | 45.68 min (A10G) |
Expand Down
8 changes: 4 additions & 4 deletions config_hub/finetune/gemma-2b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ out_dir: out/finetune/full-gemma-2b
precision: bf16-true

# How many devices/GPUs to use. (type: Union[int, str], default: 1)
devices: 1
devices: 4

# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
data:
Expand All @@ -32,7 +32,7 @@ train:
log_interval: 1

# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
global_batch_size: 6
global_batch_size: 16

# Number of samples per data-parallel rank (type: int, default: 4)
micro_batch_size: 1
Expand All @@ -41,13 +41,13 @@ train:
lr_warmup_steps: 100

# Number of epochs to train on (type: Optional[int], default: 5)
epochs: 3
epochs: 1

# Total number of tokens to train on (type: Optional[int], default: null)
max_tokens:

# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
max_steps:
max_steps: 50

# Limits the length of samples. Off by default (type: Optional[int], default: null)
max_seq_length: 512
Expand Down
4 changes: 2 additions & 2 deletions config_hub/finetune/gemma-2b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ quantize:
devices: 1

# The LoRA rank. (type: int, default: 8)
lora_r: 16
lora_r: 8

# The LoRA alpha. (type: int, default: 16)
lora_alpha: 16
Expand Down Expand Up @@ -71,7 +71,7 @@ train:
lr_warmup_steps: 200

# Number of epochs to train on (type: Optional[int], default: 5)
epochs: 4
epochs: 2

# Total number of tokens to train on (type: Optional[int], default: null)
max_tokens:
Expand Down
2 changes: 1 addition & 1 deletion config_hub/finetune/gemma-2b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ train:
lr_warmup_steps: 200

# Number of epochs to train on (type: Optional[int], default: 5)
epochs: 4
epochs: 2

# Total number of tokens to train on (type: Optional[int], default: null)
max_tokens:
Expand Down
122 changes: 122 additions & 0 deletions config_hub/finetune/gemma-7b/lora.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@

# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
checkpoint_dir: checkpoints/google/gemma-7b

# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
out_dir: out/finetune/qlora-gemma-7b

# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
precision: bf16-true

# If set, quantize the model with this algorithm. See ``tutorials/quantize.md`` for more information. (type: Optional[Literal['nf4', 'nf4-dq', 'fp4', 'fp4-dq', 'int8-training']], default: null)
quantize:

# How many devices/GPUs to use. (type: Union[int, str], default: 1)
devices: 1

# The LoRA rank. (type: int, default: 8)
lora_r: 16

# The LoRA alpha. (type: int, default: 16)
lora_alpha: 16

# The LoRA dropout value. (type: float, default: 0.05)
lora_dropout: 0.1

# Whether to apply LoRA to the query weights in attention. (type: bool, default: True)
lora_query: true

# Whether to apply LoRA to the key weights in attention. (type: bool, default: False)
lora_key: true

# Whether to apply LoRA to the value weights in attention. (type: bool, default: True)
lora_value: true

# Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False)
lora_projection: true

# Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False)
lora_mlp: true

# Whether to apply LoRA to output head in GPT. (type: bool, default: False)
lora_head: true

# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
data:
class_path: litgpt.data.Alpaca2k
init_args:
mask_prompt: false
val_split_fraction: 0.03847
prompt_style: alpaca
ignore_index: -100
seed: 42
num_workers: 4

# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
train:

# Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
save_interval: 800

# Number of iterations between logging calls (type: int, default: 1)
log_interval: 1

# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
global_batch_size: 6

# Number of samples per data-parallel rank (type: int, default: 4)
micro_batch_size: 1

# Number of iterations with learning rate warmup active (type: int, default: 100)
lr_warmup_steps: 200

# Number of epochs to train on (type: Optional[int], default: 5)
epochs: 2

# Total number of tokens to train on (type: Optional[int], default: null)
max_tokens:

# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
max_steps:

# Limits the length of samples. Off by default (type: Optional[int], default: null)
max_seq_length: 512

# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

# (type: float, default: 6e-05)
min_lr: 6.0e-05

# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
eval:

# Number of optimizer steps between evaluation calls (type: int, default: 100)
interval: 25

# Number of tokens to generate (type: Optional[int], default: 100)
max_new_tokens: 100

# Number of iterations (type: int, default: 100)
max_iters: 100

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337
122 changes: 122 additions & 0 deletions config_hub/finetune/gemma-7b/qlora.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@

# The path to the base model's checkpoint directory to load for finetuning. (type: <class 'Path'>, default: checkpoints/stabilityai/stablelm-base-alpha-3b)
checkpoint_dir: checkpoints/google/gemma-7b

# Directory in which to save checkpoints and logs. (type: <class 'Path'>, default: out/lora)
out_dir: out/finetune/qlora-gemma-7b

# The precision to use for finetuning. Possible choices: "bf16-true", "bf16-mixed", "32-true". (type: Optional[str], default: null)
precision: bf16-true

# If set, quantize the model with this algorithm. See ``tutorials/quantize.md`` for more information. (type: Optional[Literal['nf4', 'nf4-dq', 'fp4', 'fp4-dq', 'int8-training']], default: null)
quantize: bnb.nf4

# How many devices/GPUs to use. (type: Union[int, str], default: 1)
devices: 1

# The LoRA rank. (type: int, default: 8)
lora_r: 16

# The LoRA alpha. (type: int, default: 16)
lora_alpha: 16

# The LoRA dropout value. (type: float, default: 0.05)
lora_dropout: 0.1

# Whether to apply LoRA to the query weights in attention. (type: bool, default: True)
lora_query: true

# Whether to apply LoRA to the key weights in attention. (type: bool, default: False)
lora_key: true

# Whether to apply LoRA to the value weights in attention. (type: bool, default: True)
lora_value: true

# Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False)
lora_projection: true

# Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False)
lora_mlp: true

# Whether to apply LoRA to output head in GPT. (type: bool, default: False)
lora_head: true

# Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``.
data:
class_path: litgpt.data.Alpaca2k
init_args:
mask_prompt: false
val_split_fraction: 0.03847
prompt_style: alpaca
ignore_index: -100
seed: 42
num_workers: 4

# Training-related arguments. See ``litgpt.args.TrainArgs`` for details
train:

# Number of optimizer steps between saving checkpoints (type: Optional[int], default: 1000)
save_interval: 800

# Number of iterations between logging calls (type: int, default: 1)
log_interval: 1

# Number of samples between optimizer steps across data-parallel ranks (type: int, default: 128)
global_batch_size: 6

# Number of samples per data-parallel rank (type: int, default: 4)
micro_batch_size: 1

# Number of iterations with learning rate warmup active (type: int, default: 100)
lr_warmup_steps: 200

# Number of epochs to train on (type: Optional[int], default: 5)
epochs: 2

# Total number of tokens to train on (type: Optional[int], default: null)
max_tokens:

# Limits the number of optimizer steps to run. (type: Optional[int], default: null)
max_steps:

# Limits the length of samples. Off by default (type: Optional[int], default: null)
max_seq_length: 512

# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

# (type: float, default: 6e-05)
min_lr: 6.0e-05

# Evaluation-related arguments. See ``litgpt.args.EvalArgs`` for details
eval:

# Number of optimizer steps between evaluation calls (type: int, default: 100)
interval: 25

# Number of tokens to generate (type: Optional[int], default: 100)
max_new_tokens: 100

# Number of iterations (type: int, default: 100)
max_iters: 100

# The name of the logger to send metrics to. (type: Literal['wandb', 'tensorboard', 'csv'], default: csv)
logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

0 comments on commit 595ae45

Please sign in to comment.