"vscode:/vscode.git/clone" did not exist on "ce755d6674ca6c5318c5ea252fe9126aacac4f10"
Unverified Commit 65b98089 authored by Michael Goin's avatar Michael Goin Committed by GitHub
Browse files

[Bugfix] Disable FlashInfer CUTLASS MoE on SM121 (DGX Spark) (#39825)


Signed-off-by: default avatarmgoin <mgoin64@gmail.com>
Co-authored-by: default avatarClaude <noreply@anthropic.com>
parent 507df79a
...@@ -130,7 +130,14 @@ class FlashInferExperts(mk.FusedMoEExpertsModular): ...@@ -130,7 +130,14 @@ class FlashInferExperts(mk.FusedMoEExpertsModular):
p.is_device_capability(90) p.is_device_capability(90)
or p.is_device_capability_family(100) or p.is_device_capability_family(100)
or p.is_device_capability_family(110) or p.is_device_capability_family(110)
or p.is_device_capability_family(120) or p.is_device_capability(120)
# NOTE: SM121 (DGX Spark) is excluded because the bf16
# unquantized CUTLASS MoE GEMM in flashinfer <= 0.6.7 has no
# Relu2 template instantiation and throws "Invalid activation
# type" on Nemotron-H. Fixed upstream by
# https://github.com/flashinfer-ai/flashinfer/pull/2926
# (merged 2026-04-01, not yet in a stable release); lift this
# restriction once flashinfer >= 0.6.8 is the minimum.
) )
and has_flashinfer_cutlass_fused_moe() and has_flashinfer_cutlass_fused_moe()
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment