Fixed multi-GPU quantization (#196)

casper-hansen · Nov 16, 2023 · 6f516b8 · 6f516b8
1 parent 74d0fe4
commit 6f516b8
Showing 1 changed file with 8 additions and 1 deletion.
diff --git a/awq/quantize/quantizer.py b/awq/quantize/quantizer.py
@@ -69,8 +69,15 @@ def pseudo_dequantize_tensor(self, w: nn.Linear, scales: torch.Tensor, zeros: to
 
     def quantize(self):
         for i in tqdm(range(len(self.modules)), desc="AWQ"):
+            # Move module and inputs to correct device
+            common_device = next(self.modules[i].parameters()).device
+            if common_device is None or str(common_device) == "cpu":
+                self.modules[i] = self.modules[i].cuda()
+                common_device = next(self.modules[i].parameters()).device
+
+            self.inps = self.inps.to(common_device)
+
             # [STEP 1]: Get layer, extract linear modules, extract input features
-            self.modules[i] = self.modules[i].cuda()
             named_linears = get_named_linears(self.modules[i])
             input_feat = self._get_input_feat(self.modules[i], named_linears)
             clear_memory()