revert 修改flashmla的接口

ce7212d2 · zhuwenwen · ff8507ce · ce7212d2
Commit ce7212d2 authored Aug 21, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

vllm/attention/ops/flashmla.py vllm/attention/ops/flashmla.py +1 -1

No files found.
--- a/vllm/attention/ops/flashmla.py
+++ b/vllm/attention/ops/flashmla.py
@@ -100,7 +100,7 @@ def flash_mla_with_kvcache(
        softmax_scale = q.shape[-1]**(-0.5)
    if current_platform.is_rocm():
        if kv_cache_dtype == "fp8":         
-            out, softmax_lse = flash_mla_cuda.fwd_kvcache_mla(
+            out, softmax_lse = flash_mla_cuda.fwd_kvcache_quantization_mla(
                q,
                k_cache,
                None,