diff --git a/config_hub/finetune/llama-2-7b/full-alpaca.yaml b/config_hub/finetune/llama-2-7b/full-alpaca.yaml new file mode 100644 index 0000000000..0f28d5103b --- /dev/null +++ b/config_hub/finetune/llama-2-7b/full-alpaca.yaml @@ -0,0 +1,38 @@ +precision: bf16-true +devices: 4 +resume: false +seed: 1337 +data: + class_path: lit_gpt.data.AlpacaGPT4 + init_args: + mask_prompt: false + test_split_fraction: 0.03847 + ignore_index: -1 + seed: 42 + num_workers: 4 + download_dir: data/alpacagpt4 + file_url: https://raw.githubusercontent.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/main/data/alpaca_gpt4_data.json + file_name: alpacagpt4_data_cleaned_archive.json +checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf +out_dir: out/finetune/full-llama2-7b-alpaca +train: + save_interval: 200 + log_interval: 1 + global_batch_size: 64 + micro_batch_size: 4 + lr_warmup_steps: 25 + epochs: 1 + max_tokens: null + max_steps: null + max_seq_length: 512 + tie_embeddings: null + learning_rate: 0.0002 + weight_decay: 0.1 + beta1: 0.9 + beta2: 0.95 + max_norm: null + min_lr: 6.0e-05 +eval: + interval: 100 + max_new_tokens: 100 + max_iters: 100 diff --git a/config_hub/finetune/llama-2-7b/full-deita.yaml b/config_hub/finetune/llama-2-7b/full-deita.yaml new file mode 100644 index 0000000000..98ab44b46c --- /dev/null +++ b/config_hub/finetune/llama-2-7b/full-deita.yaml @@ -0,0 +1,37 @@ +precision: bf16-true +devices: 4 +resume: false +seed: 1337 +data: + class_path: lit_gpt.data.Deita + init_args: + mask_prompt: false + ignore_index: -1 + seed: 42 + num_workers: 4 + include_multiturn_conversations: false + download_dir: data/deita + repo_id: HuggingFaceH4/deita-10k-v0-sft +checkpoint_dir: checkpoints/meta-llama/Llama-2-7b-hf +out_dir: out/finetune/full-llama2-7b-deita +train: + save_interval: 200 + log_interval: 1 + global_batch_size: 64 + micro_batch_size: 4 + lr_warmup_steps: 25 + epochs: 3 + max_tokens: null + max_steps: null + max_seq_length: 1024 + tie_embeddings: null + learning_rate: 5.0e-05 + weight_decay: 0.02 + beta1: 0.9 + beta2: 0.95 + max_norm: null + min_lr: 6.0e-05 +eval: + interval: 100 + max_new_tokens: 100 + max_iters: 100 diff --git a/config_hub/finetune/tiny-llama/full-alpaca.yaml b/config_hub/finetune/tiny-llama/full-alpaca.yaml new file mode 100644 index 0000000000..131ded7cf6 --- /dev/null +++ b/config_hub/finetune/tiny-llama/full-alpaca.yaml @@ -0,0 +1,38 @@ +precision: bf16-true +devices: 1 +resume: false +seed: 1337 +data: + class_path: lit_gpt.data.AlpacaGPT4 + init_args: + mask_prompt: false + test_split_fraction: 0.03847 + ignore_index: -1 + seed: 42 + num_workers: 4 + download_dir: data/alpacagpt4 + file_url: https://raw.githubusercontent.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM/main/data/alpaca_gpt4_data.json + file_name: alpacagpt4_data_cleaned_archive.json +checkpoint_dir: checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +out_dir: out/finetune/full-tinyllama-alpacagpt4 +train: + save_interval: 200 + log_interval: 1 + global_batch_size: 64 + micro_batch_size: 16 + lr_warmup_steps: 100 + epochs: 2 + max_tokens: null + max_steps: null + max_seq_length: 512 + tie_embeddings: null + learning_rate: 5.0e-05 + weight_decay: 0.02 + beta1: 0.9 + beta2: 0.95 + max_norm: null + min_lr: 6.0e-05 +eval: + interval: 100 + max_new_tokens: 100 + max_iters: 100 diff --git a/config_hub/finetune/tiny-llama/full-deita.yaml b/config_hub/finetune/tiny-llama/full-deita.yaml new file mode 100644 index 0000000000..02c47520d5 --- /dev/null +++ b/config_hub/finetune/tiny-llama/full-deita.yaml @@ -0,0 +1,37 @@ +precision: bf16-true +devices: 1 +resume: false +seed: 1337 +data: + class_path: lit_gpt.data.Deita + init_args: + mask_prompt: false + ignore_index: -1 + seed: 42 + num_workers: 4 + include_multiturn_conversations: false + download_dir: data/deita + repo_id: HuggingFaceH4/deita-10k-v0-sft +checkpoint_dir: checkpoints/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +out_dir: out/finetune/full-tinyllama-deita +train: + save_interval: 200 + log_interval: 1 + global_batch_size: 64 + micro_batch_size: 4 + lr_warmup_steps: 100 + epochs: 2 + max_tokens: null + max_steps: null + max_seq_length: 1024 + tie_embeddings: null + learning_rate: 5.0e-05 + weight_decay: 0.02 + beta1: 0.9 + beta2: 0.95 + max_norm: null + min_lr: 6.0e-05 +eval: + interval: 100 + max_new_tokens: 100 + max_iters: 100