update triton_mla.py

e046b382 · zhuwenwen · f54ad7b9 · e046b382
Commit e046b382 authored Jun 13, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 2 deletions

vllm/attention/backends/triton_mla.py vllm/attention/backends/triton_mla.py +3 -2

No files found.
--- a/vllm/attention/backends/triton_mla.py
+++ b/vllm/attention/backends/triton_mla.py
@@ -120,8 +120,9 @@ class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]):
            match_seq_len = int((decode_meta.seq_lens_tensor.sum()/ max(1, B)).item())
        else:
            match_seq_len = max_seq_len
-        best_config = self.attn_configs[min(self.attn_configs.keys(), key=lambda x: abs(int(x) - match_seq_len))]
+        if envs.VLLM_USE_TRITON_OPT_MLA:
+            best_config = self.attn_configs[min(self.attn_configs.keys(), key=lambda x: abs(int(x) - match_seq_len))]
        # Run MQA
        decode_attention_fwd(q, kv_c_and_k_pe_cache, kv_c_cache, o,