Merge pull request #12 from sazczmh/main

tests: Triton 3.2.0 had remove the fast_flush parameter from do_bench

Merge pull request #12 from sazczmh/main
tests: Triton 3.2.0 had remove the fast_flush parameter from do_bench
accc1695 · Jiashi Li · GitHub · 414a2f3e · 051e40e8 · accc1695
Unverified Commit accc1695 authored Feb 24, 2025 by Jiashi Li Committed by GitHub Feb 24, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

tests/test_flash_mla.py tests/test_flash_mla.py +1 -1

No files found.
--- a/tests/test_flash_mla.py
+++ b/tests/test_flash_mla.py
@@ -87,7 +87,7 @@ def test_flash_mla(b, s_q, mean_sk, h_q, h_kv, d, dv, causal, varlen):
    cal_diff(out_flash, out_torch, "out")
    cal_diff(lse_flash, lse_torch, "lse")
-    t = triton.testing.do_bench(flash_mla, fast_flush=False)
+    t = triton.testing.do_bench(flash_mla)
    FLOPS = s_q * total_seqlens * h_q * (d + dv) * 2
    bytes = (total_seqlens * h_kv * d + b * s_q * h_q * d + b * s_q * h_q * dv) * (torch.finfo(dtype).bits // 8)
    print(f"{t:.3f} ms, {FLOPS / 10 ** 9 / t:.0f} TFLOPS, {bytes / 10 ** 6 / t:.0f} GB/s")