un-fuse bias

d9ca0ed9 · justheuristic · 7facedda · d9ca0ed9 · d9ca0ed9
Commit d9ca0ed9 authored Sep 17, 2022 by justheuristic
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

bitsandbytes/autograd/_functions.py bitsandbytes/autograd/_functions.py +5 -3

tests/test_autograd.py tests/test_autograd.py +1 -1

No files found.
--- a/bitsandbytes/autograd/_functions.py
+++ b/bitsandbytes/autograd/_functions.py
@@ -234,8 +234,6 @@ class MatMul8bitLt(torch.autograd.Function):
        if A_dtype != torch.float16:
            warnings.warn(f"MatMul8bitLt: input matrix will be converted from {A_dtype} to float16")
            A = A.to(torch.float16)
-            if bias is not None:
-                bias = bias.to(torch.float16)
        # 1. Quantize A
        if len(A.shape) == 3:
@@ -315,7 +313,11 @@ class MatMul8bitLt(torch.autograd.Function):
        C32A, SA = F.transform(CA, "col32")
        out32, Sout32 = F.igemmlt(C32A, state.CxB, SA, state.SB)
        # we apply the fused bias here
-        output = F.mm_dequant(out32, Sout32, SCA, state.SCB, bias=bias)
+        fused_bias = bias if bias.dtype == torch.float16 else None
+        output = F.mm_dequant(out32, Sout32, SCA, state.SCB, bias=fused_bias)
+        if fused_bias is None and bias is not None:
+            output.add_(bias.to(output.dtype))
        # 4. Mixed-precision decomposition matmul
        if coo_tensorA is not None and subA is not None:

--- a/tests/test_autograd.py
+++ b/tests/test_autograd.py
@@ -427,4 +427,4 @@ def test_matmullt(
                    )
                if req_grad[2]:
-                    torch.testing.assert_allclose(gradBias1, gradBias2, atol=0.18, rtol=0.3)
+                    torch.testing.assert_allclose(gradBias1, gradBias2)