Fix restoration of quant_storage for CPU offloading (#1279)

* Fix restoration of quant_storage for CPU offloading * Clarify comment on default quant_storage in Params4bit.from_prequantized() * fix to make quant_storage dynamic based on serialized dtype * delete obsolete comment --------- Co-authored-by: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>

Fix restoration of quant_storage for CPU offloading (#1279)
* Fix restoration of quant_storage for CPU offloading * Clarify comment on default quant_storage in Params4bit.from_prequantized() * fix to make quant_storage dynamic based on serialized dtype * delete obsolete comment --------- Co-authored-by: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
7fed393a · Matthew Douglas · GitHub · e3ae243b · 7fed393a
Unverified Commit 7fed393a authored Jul 23, 2024 by Matthew Douglas Committed by GitHub Jul 23, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

bitsandbytes/nn/modules.py bitsandbytes/nn/modules.py +6 -2

No files found.
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -282,10 +282,13 @@ class Params4bit(torch.nn.Parameter):
        self.compress_statistics = self.quant_state.nested
        self.quant_type = self.quant_state.quant_type
        self.bnb_quantized = True
+        self.quant_storage = data.dtype
        return self
    def _quantize(self, device):
-        w = self.data.contiguous().cuda(device)
+        w = self.data.contiguous().to(device)
        w_4bit, quant_state = bnb.functional.quantize_4bit(
            w,
            blocksize=self.blocksize,
@@ -333,6 +336,7 @@ class Params4bit(torch.nn.Parameter):
                blocksize=self.blocksize,
                compress_statistics=self.compress_statistics,
                quant_type=self.quant_type,
+                quant_storage=self.quant_storage,
            )
            return new_param
@@ -450,7 +454,7 @@ class Linear4bit(nn.Linear):
                # since we registered the module, we can recover the state here
                assert self.weight.shape[1] == 1
                if not isinstance(self.weight, Params4bit):
-                    self.weight = Params4bit(self.weight, quant_storage=self.quant_storage)
+                    self.weight = Params4bit(self.weight, quant_storage=self.quant_storage, bnb_quantized=True)
                self.weight.quant_state = self.quant_state
            else:
                print(