Fix Linear4bit warnings/test for compute dtype

e9f3605f · Matthew Douglas · 812ef06a · e9f3605f · e9f3605f
Commit e9f3605f authored Jun 06, 2025 by Matthew Douglas
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 14 deletions

bitsandbytes/nn/modules.py bitsandbytes/nn/modules.py +2 -2

tests/test_modules.py tests/test_modules.py +4 -12

No files found.
--- a/bitsandbytes/nn/modules.py
+++ b/bitsandbytes/nn/modules.py
@@ -455,14 +455,14 @@ class Linear4bit(nn.Linear):
            self.compute_dtype = x.dtype
        elif x.dtype == torch.float16:
            # we take the compoute dtype passed into the layer
-            if self.compute_dtype == torch.float32 and (x.numel() == x.shape[-1]):
+            if self.compute_dtype in [None, torch.float32] and (x.numel() == x.shape[-1]):
                # single batch inference with input torch.float16 and compute_dtype float32 -> slow inference when it could be fast
                # warn the user about this
                warnings.warn(
                    "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference.",
                )
                warnings.filterwarnings("ignore", message=".*inference.")
-            if self.compute_dtype == torch.float32 and (x.numel() != x.shape[-1]):
+            if self.compute_dtype in [None, torch.float32] and (x.numel() != x.shape[-1]):
                warnings.warn(
                    "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.",
                )

--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -440,31 +440,23 @@ def test_4bit_linear_warnings(device):
    dim1 = 64
    with pytest.warns(UserWarning, match=r"inference or training"):
-        net = nn.Sequential(
+        net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)])
-            *[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4", compute_dtype=torch.float32) for i in range(10)]
-        )
        net = net.to(device)
        inp = torch.rand(10, dim1, device=device, dtype=torch.float16)
        net(inp)
    with pytest.warns(UserWarning, match=r"inference."):
-        net = nn.Sequential(
+        net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)])
-            *[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4", compute_dtype=torch.float32) for i in range(10)]
-        )
        net = net.to(device)
        inp = torch.rand(1, dim1, device=device, dtype=torch.float16)
        net(inp)
    with pytest.warns(UserWarning) as record:
-        net = nn.Sequential(
+        net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)])
-            *[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4", compute_dtype=torch.float32) for i in range(10)]
-        )
        net = net.to(device)
        inp = torch.rand(10, dim1, device=device, dtype=torch.float16)
        net(inp)
-        net = nn.Sequential(
+        net = nn.Sequential(*[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4") for i in range(10)])
-            *[bnb.nn.Linear4bit(dim1, dim1, quant_type="nf4", compute_dtype=torch.float32) for i in range(10)]
-        )
        net = net.to(device)
        inp = torch.rand(1, dim1, device=device, dtype=torch.float16)
        net(inp)