From 79bba4fa35561a74d352e6d6364434e8f3d2d77e Mon Sep 17 00:00:00 2001 From: liym27 <33742067+liym27@users.noreply.github.com> Date: Tue, 19 Nov 2024 21:44:36 +0800 Subject: [PATCH] update conf for sharding overlap in auto_parallel static (#9456) --- .../llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json | 2 +- .../llama2/pretrain_config_llama2_70b/pretrain-llama2_70b.json | 2 +- .../llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json index ba77b76542dd..5df3186af3b5 100644 --- a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json +++ b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_13b/pretrain-llama2_13b.json @@ -10,7 +10,7 @@ "pipeline_parallel_degree": 4, "sharding": "stage1", "data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate", - "sharding_parallel_config": "enable_stage2_overlap", + "sharding_parallel_config": "enable_stage1_overlap", "tensor_parallel_config": "enable_mp_async_allreduce", "pipeline_parallel_config": "enable_send_recv_overlap enable_split_backward", "pipeline_schedule_mode": "VPP", diff --git a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_70b/pretrain-llama2_70b.json b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_70b/pretrain-llama2_70b.json index 431c38f35402..8ec06780d111 100644 --- a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_70b/pretrain-llama2_70b.json +++ b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_70b/pretrain-llama2_70b.json @@ -52,7 +52,7 @@ "virtual_pp_degree": 5, "pipeline_schedule_mode": "VPP", "data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate", - "sharding_parallel_config": "split_param enable_stage1_overlap", + "sharding_parallel_config": "enable_stage1_overlap", "tensor_parallel_config": "enable_mp_async_allreduce", "max_seq_length": 4096, "to_static": true, diff --git a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json index 6b89e3fd1fe4..d29b077f9ac4 100644 --- a/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json +++ b/tests/test_tipc/static/auto_parallel/llama2/pretrain_config_llama2_7b/pretrain-llama2_7b.json @@ -10,7 +10,7 @@ "pipeline_parallel_degree": 1, "sharding": "stage1", "data_parallel_config": "enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate", - "sharding_parallel_config": "enable_stage2_overlap", + "sharding_parallel_config": "enable_stage1_overlap", "tensor_parallel_config": "enable_mp_async_allreduce", "pipeline_parallel_config": "", "virtual_pp_degree": 1,