Skip to content

Commit

Permalink
fix qwen 14b fp6 abnormal output (#11583)
Browse files Browse the repository at this point in the history
  • Loading branch information
cyita authored Jul 16, 2024
1 parent c279849 commit 99c2274
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions python/llm/src/ipex_llm/transformers/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def replace_with_low_bit_linear_for_module(model, qtype, module_name=None,
return model


def _optimize_pre(model):
def _optimize_pre(model, qtype=None):
try:
from sentence_transformers.SentenceTransformer import SentenceTransformer
if isinstance(model, SentenceTransformer):
Expand Down Expand Up @@ -743,8 +743,9 @@ def _optimize_pre(model):
if should_apply_merge_qkv:
from ipex_llm.transformers.models.qwen2 import merge_qkv
model.apply(merge_qkv)
from ipex_llm.transformers.models.qwen2 import padding_mlp
model.apply(padding_mlp)
if qtype != ggml_tensor_qtype["fp6"]:
from ipex_llm.transformers.models.qwen2 import padding_mlp
model.apply(padding_mlp)
if model.config.model_type == "qwen2_moe":
from ipex_llm.transformers.models.qwen2_moe import merge_qkv
model.apply(merge_qkv)
Expand Down Expand Up @@ -795,7 +796,7 @@ def ggml_convert_low_bit(model, qtype, optimize_model=True,
return model

if optimize_model:
model = _optimize_pre(model)
model = _optimize_pre(model, qtype)

act_order = False
if getattr(model, "quantization_method", None) == "gptq":
Expand Down

0 comments on commit 99c2274

Please sign in to comment.