vllm-project · dsikka · Oct 19, 2024 · Oct 13, 2024 · Oct 18, 2024 · Oct 19, 2024
diff --git a/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py b/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py
@@ -1,8 +1,6 @@
 import time
 
-from compressed_tensors.quantization.lifecycle.forward import (
-    maybe_calibrate_or_quantize,
-)
+from compressed_tensors.quantization.lifecycle.forward import forward_quantize
 
 from llmcompressor.modifiers.utils.compression_wrapper import ModuleCompressionWrapper
 from llmcompressor.utils import getattr_chain
@@ -95,7 +93,7 @@ def compress(
         args_loc = "quantization_scheme.weights"
         weight_quant_args = getattr_chain(self.layer, args_loc, None)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         if isinstance(self.layer, nn.Conv2d):
             W = W.flatten(1)
@@ -213,7 +211,7 @@ def compress(
             W = W.t()
         W = W.reshape(final_shape).to(final_dtype)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         # This is a bit hacky, but FSDP updates only work if we change the weight in
         # place, clone() or direct assignment won't work