Fixed FP4 import and data type conversion in backward.

c93a90d0 · Tim Dettmers · 7f0773ae · c93a90d0 · c93a90d0
Commit c93a90d0 authored Feb 14, 2023 by Tim Dettmers
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 6 deletions

bitsandbytes/autograd/_functions.py bitsandbytes/autograd/_functions.py +1 -5

bitsandbytes/nn/__init__.py bitsandbytes/nn/__init__.py +1 -1

No files found.
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -525,13 +525,9 @@ class MatMulFP4(torch.autograd.Function):
            # compute grad_bias first before changing grad_output dtype
            grad_bias = grad_output.sum(0, dtype=ctx.dtype_bias)

-        # Cast grad_output to fp16
-        if len(grad_output.shape) == 3:
-            grad_output = grad_output.reshape(-1, grad_output.shape[-1]).contiguous()
-
        # not supported by PyTorch. TODO: create work-around
        #if req_gradB: grad_B = torch.matmul(grad_output.t(), A)
-        if req_gradA: grad_A = torch.matmul(grad_output, F.dequantize_fp4(B, ctx.state).to(ctx.dtype_A).t())
+        if req_gradA: grad_A = torch.matmul(grad_output, F.dequantize_fp4(B, ctx.state).to(grad_output.dtype).t())

        return grad_A, grad_B, None, grad_bias, None


--- a/bitsandbytes/nn/__init__.py
+++ b/bitsandbytes/nn/__init__.py
@@ -2,4 +2,4 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
-from .modules import Int8Params, Linear8bitLt, StableEmbedding, LinearFP4
+from .modules import Int8Params, Linear8bitLt, StableEmbedding, LinearFP4, FP4Params