override .cuda() to check if model is already quantized (#25166)

2a787201 · YQ · GitHub · c1dba111 · 2a787201
Unverified Commit 2a787201 authored Jul 28, 2023 by YQ Committed by GitHub Jul 28, 2023
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 0 deletions

src/transformers/modeling_utils.py src/transformers/modeling_utils.py +10 -0

No files found.
--- a/src/transformers/modeling_utils.py
+++ b/src/transformers/modeling_utils.py
@@ -1912,6 +1912,16 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
            mem = mem + mem_bufs
        return mem

+    def cuda(self, *args, **kwargs):
+        # Checks if the model has been loaded in 8-bit
+        if getattr(self, "is_quantized", False):
+            raise ValueError(
+                "Calling `cuda()` is not supported for `4-bit` or `8-bit` quantized models. Please use the model as it is, since the"
+                " model has already been set to the correct devices and casted to the correct `dtype`."
+            )
+        else:
+            return super().cuda(*args, **kwargs)
+
    def to(self, *args, **kwargs):
        # Checks if the model has been loaded in 8-bit
        if getattr(self, "is_quantized", False):