update the conditions for pad_v

5f09420d · zhuwenwen · e005bce5 · 5f09420d
Commit 5f09420d authored Sep 10, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/v1/attention/backends/mla/common.py vllm/v1/attention/backends/mla/common.py +6 -4

No files found.
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@@ -920,10 +920,12 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
            # v with 0s to match the qk head dim for attention backends that do
            # not support different headdims
            # We don't need to pad V if we are on a hopper system with FA3
-            self._pad_v = self.vllm_flash_attn_version is None or not (
-                self.vllm_flash_attn_version == 3
-                and current_platform.get_device_capability()[0] == 9
-                and torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120)
+            if not current_platform.is_rocm():
+                self._pad_v = self.vllm_flash_attn_version is None or not (
+                    self.vllm_flash_attn_version == 3
+                    and current_platform.get_device_capability()[0] == 9)
+            else:
+                self._pad_v = torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120
        

    def _flash_attn_varlen_diff_headdims(self,