fix incompatibililty with non cuda platform for nvfp4 (#23478)

Signed-off-by: Lu Fang <fanglu@fb.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>

fix incompatibililty with non cuda platform for nvfp4 (#23478)
Signed-off-by: Lu Fang <fanglu@fb.com> Co-authored-by: Lucia (Lu) Fang <fanglu@meta.com>
c7fc6b13 · Lucia Fang · GitHub · ad788684 · c7fc6b13
Unverified Commit c7fc6b13 authored Aug 24, 2025 by Lucia Fang Committed by GitHub Aug 24, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 1 deletion

vllm/compilation/fusion.py vllm/compilation/fusion.py +3 -1

No files found.
--- a/vllm/compilation/fusion.py
+++ b/vllm/compilation/fusion.py
@@ -47,8 +47,10 @@ QUANT_OPS: dict[QuantKey, OpOverload] = {
    torch.ops._C.dynamic_scaled_fp8_quant.default,  # noqa: E501
    kFp8DynamicTokenSym:
    torch.ops._C.dynamic_per_token_scaled_fp8_quant.default,  # noqa: E501
-    kNvfp4Quant: torch.ops._C.scaled_fp4_quant.default,  # noqa: E501
 }
+if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
+    QUANT_OPS[
+        kNvfp4Quant] = torch.ops._C.scaled_fp4_quant.default  # noqa: E501
 class FusedRMSQuantKey(NamedTuple):