Unverified Commit 5f96c00c authored by jiahanc's avatar jiahanc Committed by GitHub
Browse files

[Fix] Add SM check to flashinfer MOE backend (#29144)


Signed-off-by: default avatarjiahanc <173873397+jiahanc@users.noreply.github.com>
Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Co-authored-by: default avatarmgoin <mgoin64@gmail.com>
parent 45870632
...@@ -282,6 +282,16 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend: ...@@ -282,6 +282,16 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND flashinfer_moe_backend = envs.VLLM_FLASHINFER_MOE_BACKEND
if flashinfer_moe_backend in backend_map: if flashinfer_moe_backend in backend_map:
if (
flashinfer_moe_backend == "latency"
and not current_platform.is_device_capability(100)
):
logger.info_once(
"Flashinfer TRTLLM MOE backend is only supported on "
"SM100 and later, using CUTLASS backend instead",
scope="local",
)
return FlashinferMoeBackend.CUTLASS
return backend_map[flashinfer_moe_backend] return backend_map[flashinfer_moe_backend]
elif current_platform.is_device_capability(90): elif current_platform.is_device_capability(90):
return FlashinferMoeBackend.CUTLASS return FlashinferMoeBackend.CUTLASS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment