skip arch reason

66a7ebd8 · zhuwenwen · 49be5e62 · 66a7ebd8
Commit 66a7ebd8 authored Feb 06, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 14 deletions

vllm/platforms/rocm.py vllm/platforms/rocm.py +14 -14

No files found.
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -38,24 +38,24 @@ _ROCM_UNSUPPORTED_MODELS: List[str] = []

 # Models partially supported by ROCm.
 # Architecture -> Reason.
-_ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in "
-                    "Triton flash attention. For half-precision SWA support, "
-                    "please use CK flash attention by setting "
-                    "`VLLM_USE_TRITON_FLASH_ATTN=0`")
+# _ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in "
+#                     "Triton flash attention. For half-precision SWA support, "
+#                     "please use CK flash attention by setting "
+#                     "`VLLM_USE_TRITON_FLASH_ATTN=0`")
 _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
-    "Qwen2ForCausalLM":
-    _ROCM_SWA_REASON,
-    "MistralForCausalLM":
-    _ROCM_SWA_REASON,
-    "MixtralForCausalLM":
-    _ROCM_SWA_REASON,
+    # "Qwen2ForCausalLM":
+    # _ROCM_SWA_REASON,
+    # "MistralForCausalLM":
+    # _ROCM_SWA_REASON,
+    # "MixtralForCausalLM":
+    # _ROCM_SWA_REASON,
    "PaliGemmaForConditionalGeneration":
    ("ROCm flash attention does not yet "
     "fully support 32-bit precision on PaliGemma"),
-    "Phi3VForCausalLM":
-    ("ROCm Triton flash attention may run into compilation errors due to "
-     "excessive use of shared memory. If this happens, disable Triton FA "
-     "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
+    # "Phi3VForCausalLM":
+    # ("ROCm Triton flash attention may run into compilation errors due to "
+    #  "excessive use of shared memory. If this happens, disable Triton FA "
+    #  "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
 }