cast before allclose

95dafc64 · justheuristic · 37f805bb · 95dafc64
Commit 95dafc64 authored Sep 18, 2022 by justheuristic
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 4 deletions

tests/test_modules.py tests/test_modules.py +3 -4

No files found.
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -541,8 +541,8 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
    mlp = MLP8bit(
            32, 64, threshold=threshold, has_fp16_weights=False, memory_efficient_backward=memory_efficient_backward
        )
-    w1, w2 = mlp.fc1.weight.clone(), mlp.fc2.weight.clone()
+    w1, w2 = mlp.fc1.weight.clone(), mlp.fc2.weight.clone()  # note: we grad original weights before quantization,
-    mlp = mlp.cuda().half()
+    mlp = mlp.cuda().half()  # and this line triggers quantization
    for i in range(100):
        b1 = torch.randn(16, 8, 32, device="cuda").half()
@@ -567,8 +567,7 @@ def test_linear8bitlt_no_fp16_weights(threshold, memory_efficient_backward):
        mlp.zero_grad()
        (o1 * grad_proj).sum().backward()
-        assert False, (w1, w2)
+        grad_ref = grad_proj.flatten(2) @ w2.to(grad_proj.device) @ w1.to(grad_proj.device)
-        grad_ref = grad_proj.flatten(2) @ w2 @ w1
        assert torch.allclose(b1.grad, grad_ref)