Skip to content

Commit

Permalink
OptimizerArgs (#1409)
Browse files Browse the repository at this point in the history
Co-authored-by: Carlos Mocholí <[email protected]>
  • Loading branch information
rasbt and carmocca authored May 23, 2024
1 parent aa95635 commit 141c8bf
Show file tree
Hide file tree
Showing 42 changed files with 667 additions and 420 deletions.
30 changes: 18 additions & 12 deletions config_hub/finetune/falcon-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -122,3 +110,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/falcon-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -124,3 +112,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/gemma-2b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -93,3 +81,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/gemma-2b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.2

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -123,3 +111,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/gemma-2b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -123,3 +111,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/gemma-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -123,3 +111,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/gemma-7b/qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -85,18 +85,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -123,3 +111,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/llama-2-7b/full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.1

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -96,3 +84,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
30 changes: 18 additions & 12 deletions config_hub/finetune/llama-2-7b/lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,6 @@ train:
# Whether to tie the embedding weights with the language modeling head weights. (type: Optional[bool], default: null)
tie_embeddings:

# (type: float, default: 0.0003)
learning_rate: 0.0002

# (type: float, default: 0.02)
weight_decay: 0.0

# (type: float, default: 0.9)
beta1: 0.9

# (type: float, default: 0.95)
beta2: 0.95

# (type: Optional[float], default: null)
max_norm:

Expand All @@ -122,3 +110,21 @@ logger_name: csv

# The random seed to use for reproducibility. (type: int, default: 1337)
seed: 1337

# Optimizer-related arguments
optimizer:

class_path: torch.optim.AdamW

init_args:

# (type: float, default: 0.001)
lr: 0.0002

# (type: float, default: 0.01)
weight_decay: 0.0

# (type: tuple, default: (0.9,0.999))
betas:
- 0.9
- 0.95
Loading

0 comments on commit 141c8bf

Please sign in to comment.