diff --git a/docs/mddocs/Quickstart/npu_quickstart.md b/docs/mddocs/Quickstart/npu_quickstart.md index 9cdef90890a..ac964a024c5 100644 --- a/docs/mddocs/Quickstart/npu_quickstart.md +++ b/docs/mddocs/Quickstart/npu_quickstart.md @@ -88,6 +88,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a - For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxV (code name Lunar Lake)**: - For Intel Core™ Ultra 7 Processor 258V: + No runtime configuration required. - For Intel Core™ Ultra 5 Processor 228V & 226V: @@ -97,7 +98,7 @@ For `ipex-llm` NPU support, please set the following environment variable with a - For **Intel Core™ Ultra Processors (Series 2) with processor number 2xxK (code name Arrow Lake)**: ```cmd - set IPEX_LLM_NPU_DISABLE_COMPILE_OPT=1 + set IPEX_LLM_NPU_ARL=1 ``` - For **Intel Core™ Ultra Processors (Series 1) with processor number 1xxH (code name Meteor Lake)**: diff --git a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py index 2c145c536b7..5750540d04e 100644 --- a/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py +++ b/python/llm/src/ipex_llm/transformers/npu_models/convert_mp.py @@ -37,6 +37,10 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision, os.environ["IPEX_LLM_NPU_USE_LEVEL0"] = "0" os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1" + if os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1": + # For ARL support + os.environ["IPEX_LLM_NPU_DISABLE_COMPILE_OPT"] = "1" + if model.config.model_type == "baichuan": # process NormHead module in Baichuan2 7B if hasattr(model, 'lm_head') and model.lm_head is not None: @@ -144,7 +148,9 @@ def optimize_llm_pre(model: torch.nn.Module, qtype, mixed_precision, # do not split mlp down_proj for Qwen2-7B & sym_int8 n_splits_down_proj = 1 else: - n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1 + n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or + os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or + os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1 else: invalidInputError( model.config.hidden_size % quantization_group_size == 0 and diff --git a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py index f429dd4ebe4..41c35b095e6 100644 --- a/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py +++ b/python/llm/src/ipex_llm/transformers/npu_pipeline_model/convert_pipeline.py @@ -205,7 +205,9 @@ def convert_llm(model: torch.nn.Module, # do not split mlp down_proj for Qwen2-7B & sym_int8 n_splits_down_proj = 1 else: - n_splits_down_proj = 2 if model.config.intermediate_size == 18944 else 1 + n_splits_down_proj = 2 if (model.config.intermediate_size == 18944 or + os.environ.get("IPEX_LLM_NPU_MTL", "0") == "1" or + os.environ.get("IPEX_LLM_NPU_ARL", "0") == "1") else 1 else: n_splits_linear = model.config.hidden_size // group_size n_splits_down_proj = model.config.intermediate_size // group_size