From 4b514a54c5ffcf6027d200943aa3697b0ef68fac Mon Sep 17 00:00:00 2001 From: George Ohashi Date: Fri, 14 Jun 2024 14:12:51 +0000 Subject: [PATCH] initialize zp, scale loaded from HF quantizer, applying quant_config --- src/compressed_tensors/compressors/model_compressor.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/compressed_tensors/compressors/model_compressor.py b/src/compressed_tensors/compressors/model_compressor.py index 269fe870..c927d0c0 100644 --- a/src/compressed_tensors/compressors/model_compressor.py +++ b/src/compressed_tensors/compressors/model_compressor.py @@ -258,7 +258,14 @@ def decompress(self, model_path: str, model: Module): self._replace_weights(dense_gen, model) def update_status(module): - module.quantization_status = QuantizationStatus.FROZEN + import torch + + first_param_dtype = next(model.parameters()).dtype + if first_param_dtype == torch.float16: + # loading unquantized model + module.quantization_status = QuantizationStatus.INITIALIZED + else: + module.quantization_status = QuantizationStatus.FROZEN model.apply(update_status) setattr(model, QUANTIZATION_CONFIG_NAME, self.quantization_config)