fix fwd func call (#845)

Signed-off-by: Kyle Sayers <[email protected]>
vllm-project · Oct 23, 2024 · e5e0d9a · e5e0d9a
1 parent c6acef2
commit e5e0d9a
Showing 1 changed file with 3 additions and 5 deletions.
diff --git a/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py b/src/llmcompressor/modifiers/obcq/utils/sgpt_wrapper.py
@@ -1,8 +1,6 @@
 import time
 
-from compressed_tensors.quantization.lifecycle.forward import (
-    maybe_calibrate_or_quantize,
-)
+from compressed_tensors.quantization.lifecycle.forward import forward_quantize
 
 from llmcompressor.modifiers.utils.compression_wrapper import ModuleCompressionWrapper
 from llmcompressor.utils import getattr_chain
@@ -95,7 +93,7 @@ def compress(
         args_loc = "quantization_scheme.weights"
         weight_quant_args = getattr_chain(self.layer, args_loc, None)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         if isinstance(self.layer, nn.Conv2d):
             W = W.flatten(1)
@@ -213,7 +211,7 @@ def compress(
             W = W.t()
         W = W.reshape(final_shape).to(final_dtype)
         if weight_quant_args is not None:
-            W = maybe_calibrate_or_quantize(self.layer, W, "weight", weight_quant_args)
+            W = forward_quantize(self.layer, W, "weight", weight_quant_args)
 
         # This is a bit hacky, but FSDP updates only work if we change the weight in
         # place, clone() or direct assignment won't work