Unverified Commit 6534d2fc authored by zifeitong's avatar zifeitong Committed by GitHub
Browse files

Fix torch version check for SM100 mxfp4 (#22535)


Signed-off-by: default avatarZifei Tong <zifeitong@gmail.com>
Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Co-authored-by: default avatarmgoin <mgoin64@gmail.com>
parent 422f22e0
...@@ -741,12 +741,14 @@ class FusedMoE(torch.nn.Module): ...@@ -741,12 +741,14 @@ class FusedMoE(torch.nn.Module):
# we padding globally so EP buffer allocation works # we padding globally so EP buffer allocation works
if quant_config and quant_config.get_name() == "mxfp4": if quant_config and quant_config.get_name() == "mxfp4":
if not is_torch_equal_or_newer("2.8.0"): if not current_platform.is_device_capability(100):
raise RuntimeError("Mxfp4 on hopper requires torch >= 2.8.0") if not is_torch_equal_or_newer("2.8.0"):
if current_platform.is_device_capability( raise RuntimeError(
90) and not has_triton_kernels(): "Mxfp4 on non-blackwell requires torch >= 2.8.0")
raise NotImplementedError( if not has_triton_kernels():
"Triton kernels must be installed for mxfp4 on hopper") raise NotImplementedError(
"triton_kernels must be installed for "
"mxfp4 on non-blackwell")
if (current_platform.is_rocm() if (current_platform.is_rocm()
or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8 or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16): or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment