From 2fe2c8b8c4cdc465ec409b85046ce20c7af80e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Monk=20=28looking=20for=20PhD=20Fall=E2=80=9924=29?= Date: Tue, 30 Apr 2024 23:53:47 +0530 Subject: [PATCH 1/4] Create phi3-ft.yml --- examples/phi/phi3-ft.yml | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 examples/phi/phi3-ft.yml diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft.yml new file mode 100644 index 0000000000..483335a906 --- /dev/null +++ b/examples/phi/phi3-ft.yml @@ -0,0 +1,83 @@ +base_model: microsoft/Phi-3-mini-4k-instruct +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: aaditya/alpaca_subset_1 + type: alpaca + +dataset_prepared_path: +val_set_size: 0 +output_dir: ./phi-sft-out + +sequence_len: 4000 +sample_packing: true +pad_to_sequence_len: true +trust_remote_code: true + +adapter: +lora_model_dir: +lora_r: +lora_alpha: +lora_dropout: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: phi3 +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 12 +num_epochs: 2 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000003 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: True +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 100 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: + - full_shard + - auto_wrap +fsdp_config: + fsdp_limit_all_gathers: true + fsdp_sync_module_states: true + fsdp_offload_params: true + fsdp_use_orig_params: false + fsdp_cpu_ram_efficient_loading: true + fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP + fsdp_transformer_layer_cls_to_wrap: Phi3DecoderLayer + fsdp_state_dict_type: FULL_STATE_DICT + fsdp_sharding_strategy: FULL_SHARD +resize_token_embeddings_to_32x: true +special_tokens: + pad_token: "<|endoftext|>" From e933d7a1342ac734aabd9972d88860e1872f4060 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 14 May 2024 08:30:31 -0400 Subject: [PATCH 2/4] rename to be fsdp specific and tweak settings a bit --- examples/phi/{phi3-ft.yml => phi3-ft-fsdp.yml} | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename examples/phi/{phi3-ft.yml => phi3-ft-fsdp.yml} (95%) diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft-fsdp.yml similarity index 95% rename from examples/phi/phi3-ft.yml rename to examples/phi/phi3-ft-fsdp.yml index 483335a906..d220e577d1 100644 --- a/examples/phi/phi3-ft.yml +++ b/examples/phi/phi3-ft-fsdp.yml @@ -7,14 +7,14 @@ load_in_4bit: false strict: false datasets: - - path: aaditya/alpaca_subset_1 + - path: mhenrichsen/alpaca_2k_test type: alpaca dataset_prepared_path: val_set_size: 0 output_dir: ./phi-sft-out -sequence_len: 4000 +sequence_len: 4096 sample_packing: true pad_to_sequence_len: true trust_remote_code: true @@ -51,7 +51,7 @@ tf32: true gradient_checkpointing: true gradient_checkpointing_kwargs: - use_reentrant: True + use_reentrant: true early_stopping_patience: resume_from_checkpoint: local_rank: From 9255462f8b5e8ec8230ac56ab4a05fb24041ac4d Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 14 May 2024 08:34:38 -0400 Subject: [PATCH 3/4] add phi-3 chat template --- src/axolotl/utils/chat_templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/axolotl/utils/chat_templates.py b/src/axolotl/utils/chat_templates.py index 01b1473568..a5431ea88b 100644 --- a/src/axolotl/utils/chat_templates.py +++ b/src/axolotl/utils/chat_templates.py @@ -25,6 +25,7 @@ def chat_templates(user_choice: str): "gemma": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", "cohere": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", "llama3": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ eos_token }}{% endif %}", + "phi3": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", } if user_choice in templates: From b288ad503094afa61a236333d41f4fe70e06d6e3 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 14 May 2024 09:18:59 -0400 Subject: [PATCH 4/4] naming --- src/axolotl/utils/chat_templates.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/chat_templates.py b/src/axolotl/utils/chat_templates.py index a5431ea88b..b74c172cfa 100644 --- a/src/axolotl/utils/chat_templates.py +++ b/src/axolotl/utils/chat_templates.py @@ -25,7 +25,7 @@ def chat_templates(user_choice: str): "gemma": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", "cohere": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}", "llama3": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% else %}{{ eos_token }}{% endif %}", - "phi3": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", + "phi-3": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}", } if user_choice in templates: