remove redundant kv_cache_dtype_str

9c95f8b0 · zhuwenwen · 7d9a3bcc · 9c95f8b0
Commit 9c95f8b0 authored Jan 27, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 1 deletion

vllm/v1/attention/backends/mla/common.py vllm/v1/attention/backends/mla/common.py +0 -1

No files found.
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -1300,7 +1300,6 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
        if has_decode:
            assert attn_metadata.decode is not None
-            kv_cache_dtype_str = None
            if torch.cuda.get_device_properties("cuda").gcnArchName.split(':')[0] == "gfx938" and kv_cache_dtype_str=="fp8_e4m3" and envs.VLLM_USE_FUSED_CACHE_QUANT_BMM_MLA:
                decode_q = q_quant[:num_decode_tokens]
            decode_q_nope, decode_q_pe = decode_q.split(