Skip to content

Commit

Permalink
Fix q4 quantization
Browse files Browse the repository at this point in the history
  • Loading branch information
xenova committed Jul 3, 2024
1 parent 04af3d5 commit 9128651
Showing 1 changed file with 11 additions and 17 deletions.
28 changes: 11 additions & 17 deletions scripts/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,34 +164,28 @@ def main():
save_path,
)

elif mode == QuantMode.Q4:
quantize_q4(
model,
save_path,
block_size=quantization_args.block_size,
is_symmetric=quantization_args.is_symmetric,
accuracy_level=quantization_args.accuracy_level,
)

elif mode == QuantMode.Q4F16:
elif mode in (QuantMode.Q4, QuantMode.Q4F16):
block_size = quantization_args.block_size or 32

q4_model = quantize_q4(
model,
save_path=None,
block_size=quantization_args.block_size,
block_size=block_size,
is_symmetric=quantization_args.is_symmetric,
accuracy_level=quantization_args.accuracy_level,
)
quantize_fp16(
q4_model,
save_path,
)
if mode == QuantMode.Q4F16:
quantize_fp16(
q4_model,
save_path,
)

elif mode == QuantMode.BNB4:
quantize_bnb4(
model,
save_path,
block_size=quantization_args.block_size,
quant_type=quantization_args.quant_type,
block_size=quantization_args.block_size or 64,
quant_type=quantization_args.quant_type if quantization_args.quant_type is not None else MatMulBnb4Quantizer.NF4,
)

elif mode in (QuantMode.Q8, QuantMode.QI8, QuantMode.QU8):
Expand Down

0 comments on commit 9128651

Please sign in to comment.