issue/423: improve the precision of the torch implementation of rms_norm

612defae · Ziminli · 19d60bf8 · 612defae
Commit 612defae authored Sep 03, 2025 by Ziminli
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 6 deletions

test/infiniop/rms_norm.py test/infiniop/rms_norm.py +4 -6

No files found.
--- a/test/infiniop/rms_norm.py
+++ b/test/infiniop/rms_norm.py
@@ -59,12 +59,10 @@ NUM_ITERATIONS = 1000
 def rms_norm(ans, x, w, eps):
-    torch.pow(x, 2, out=ans)
+    input_dtype = x.dtype
-    mean = torch.mean(ans, dim=-1, keepdim=True)
+    hidden_states = x.to(torch.float32)
-    mean.add_(eps)
+    scale = hidden_states.pow(2).mean(-1, keepdim=True).add_(eps).rsqrt_()
-    torch.rsqrt(mean, out=mean)
+    ans.set_((hidden_states.mul_(scale).mul_(w)).to(input_dtype))
-    torch.mul(x, mean, out=ans)
-    ans.mul_(w)
 def test(