Commit 66a7ebd8 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip arch reason

parent 49be5e62
...@@ -38,24 +38,24 @@ _ROCM_UNSUPPORTED_MODELS: List[str] = [] ...@@ -38,24 +38,24 @@ _ROCM_UNSUPPORTED_MODELS: List[str] = []
# Models partially supported by ROCm. # Models partially supported by ROCm.
# Architecture -> Reason. # Architecture -> Reason.
_ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in " # _ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in "
"Triton flash attention. For half-precision SWA support, " # "Triton flash attention. For half-precision SWA support, "
"please use CK flash attention by setting " # "please use CK flash attention by setting "
"`VLLM_USE_TRITON_FLASH_ATTN=0`") # "`VLLM_USE_TRITON_FLASH_ATTN=0`")
_ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = { _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
"Qwen2ForCausalLM": # "Qwen2ForCausalLM":
_ROCM_SWA_REASON, # _ROCM_SWA_REASON,
"MistralForCausalLM": # "MistralForCausalLM":
_ROCM_SWA_REASON, # _ROCM_SWA_REASON,
"MixtralForCausalLM": # "MixtralForCausalLM":
_ROCM_SWA_REASON, # _ROCM_SWA_REASON,
"PaliGemmaForConditionalGeneration": "PaliGemmaForConditionalGeneration":
("ROCm flash attention does not yet " ("ROCm flash attention does not yet "
"fully support 32-bit precision on PaliGemma"), "fully support 32-bit precision on PaliGemma"),
"Phi3VForCausalLM": # "Phi3VForCausalLM":
("ROCm Triton flash attention may run into compilation errors due to " # ("ROCm Triton flash attention may run into compilation errors due to "
"excessive use of shared memory. If this happens, disable Triton FA " # "excessive use of shared memory. If this happens, disable Triton FA "
"by setting `VLLM_USE_TRITON_FLASH_ATTN=0`") # "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment