Support sequential cpu offloading with torchao quantized tensors (#3085)

huggingface · Sep 6, 2024 · 5ad982a · 5ad982a
1 parent 9d67867
commit 5ad982a
Showing 1 changed file with 12 additions and 0 deletions.
diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py
@@ -436,6 +436,18 @@ def set_module_tensor_to_device(
                     new_value = param_cls(new_value, requires_grad=old_value.requires_grad, **kwargs).to(device)
             elif param_cls.__name__ in ["QTensor", "QBitsTensor"]:
                 new_value = torch.nn.Parameter(new_value, requires_grad=old_value.requires_grad).to(device)
+            elif param_cls.__name__ in ["AffineQuantizedTensor"]:
+                new_value = torch.nn.Parameter(
+                    param_cls(
+                        new_value.layout_tensor,
+                        new_value.block_size,
+                        new_value.shape,
+                        new_value.quant_min,
+                        new_value.quant_max,
+                        new_value.zero_point_domain,
+                    ),
+                    requires_grad=old_value.requires_grad,
+                ).to(device)
             else:
                 new_value = param_cls(new_value, requires_grad=old_value.requires_grad).to(device)