[LoRA] Set default MXFP4 LoRA backend to Marlin (#30598)

Signed-off-by: Xin Yang <xyangx@amazon.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>

[LoRA] Set default MXFP4 LoRA backend to Marlin (#30598)
Signed-off-by: Xin Yang <xyangx@amazon.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
9a5e9652 · Xin Yang · GitHub · 326e7c31 · 9a5e9652
Unverified Commit 9a5e9652 authored Dec 18, 2025 by Xin Yang Committed by GitHub Dec 18, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

vllm/model_executor/layers/quantization/mxfp4.py vllm/model_executor/layers/quantization/mxfp4.py +5 -5

No files found.
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -95,12 +95,12 @@ def get_mxfp4_backend_with_lora() -> Mxfp4Backend:
        # SM120 needs this fix: https://github.com/triton-lang/triton/pull/8498
        and (9, 0) <= current_platform.get_device_capability() < (11, 0)
    )
-    if envs.VLLM_MXFP4_USE_MARLIN or not triton_kernels_supported:
+    if envs.VLLM_MXFP4_USE_MARLIN is False and triton_kernels_supported:
-        logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend")
+        logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend")
-        return Mxfp4Backend.MARLIN
+        return Mxfp4Backend.TRITON
-    logger.info_once("[get_mxfp4_backend_with_lora] Using Triton backend")
+    logger.info_once("[get_mxfp4_backend_with_lora] Using Marlin backend")
-    return Mxfp4Backend.TRITON
+    return Mxfp4Backend.MARLIN
 def get_mxfp4_backend(with_lora_support: bool) -> Mxfp4Backend: