Skip to content
This repository has been archived by the owner on Aug 30, 2024. It is now read-only.

Commit

Permalink
update common.py
Browse files Browse the repository at this point in the history
  • Loading branch information
Zhenzhong1 committed Mar 20, 2024
1 parent cc5124c commit fde9724
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
8 changes: 7 additions & 1 deletion neural_speed/convert/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,13 @@ def unpack_gptq_weight_8bits(qweight, scales, qzeros, q_config):
zeros = zeros.to(torch.int8 if sym else torch.uint8)

zeros = zeros + 1
zeros = zeros.reshape(scales.shape)
try:
zeros = zeros.reshape(scales.shape)
except:
# zeros and scales have different iteam numbers.
# remove 1 (due to 0 + 1 in line 68)
zeros = zeros[zeros !=1]
zeros = zeros.reshape(scales.shape)

if not sym and bits == 8:
zeros = (zeros.to(torch.int32) - 128).to(torch.int8)
Expand Down
3 changes: 2 additions & 1 deletion neural_speed/convert/convert_quantized_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def main(args_in: Optional[List[str]] = None) -> None:
out_path = args.outfile.as_posix()
model_path = args.model.as_posix()

model, config, quantize_config = load_quantized_model(model_path)
#model, config, quantize_config = load_quantized_model(model_path)
model, config, quantize_config = load_quantized_safetensors(model_path)
f = open(out_path, "wb")

# 1. write hparams
Expand Down

0 comments on commit fde9724

Please sign in to comment.