Fix dpsk-r1-fp4 launching crash (#12063)

71d41212 · Qiaolin Yu · GitHub · b9fb74f3 · 71d41212 · 71d41212
Unverified Commit 71d41212 authored Oct 24, 2025 by Qiaolin Yu Committed by GitHub Oct 24, 2025
Showing with 11 additions and 20 deletions

python/sglang/srt/layers/quantization/mxfp4.py python/sglang/srt/layers/quantization/mxfp4.py +5 -7

python/sglang/srt/layers/quantization/unquant.py python/sglang/srt/layers/quantization/unquant.py +6 -13

No files found.
--- a/python/sglang/srt/layers/quantization/mxfp4.py
+++ b/python/sglang/srt/layers/quantization/mxfp4.py
@@ -587,13 +587,11 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
        self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
    ):
        self.moe_runner_config = moe_runner_config
-        backend = get_moe_runner_backend()
-        if backend.is_auto():
-            backend = (
-                MoeRunnerBackend.TRITON_KERNELS
-                if self.use_triton_kernels
-                else MoeRunnerBackend.TRITON
-            )
+        backend = (
+            MoeRunnerBackend.TRITON_KERNELS
+            if self.use_triton_kernels
+            else MoeRunnerBackend.TRITON
+        )
        self.runner = MoeRunner(backend, moe_runner_config)

    def apply(

--- a/python/sglang/srt/layers/quantization/unquant.py
+++ b/python/sglang/srt/layers/quantization/unquant.py
@@ -8,12 +8,7 @@ from torch.nn.parameter import Parameter

 from sglang.srt.custom_op import CustomOp
 from sglang.srt.layers.amx_utils import _amx_process_weight_after_loading
-from sglang.srt.layers.moe import (
-    MoeRunner,
-    MoeRunnerBackend,
-    MoeRunnerConfig,
-    get_moe_runner_backend,
-)
+from sglang.srt.layers.moe import MoeRunner, MoeRunnerBackend, MoeRunnerConfig
 from sglang.srt.layers.moe.moe_runner.triton import TritonMoeQuantInfo
 from sglang.srt.layers.quantization.base_config import (
    FusedMoEMethodBase,
@@ -225,13 +220,11 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
        self, layer: torch.nn.Module, moe_runner_config: MoeRunnerConfig
    ):
        self.moe_runner_config = moe_runner_config
-        backend = get_moe_runner_backend()
-        if backend.is_auto():
-            backend = (
-                MoeRunnerBackend.TRITON_KERNELS
-                if self.use_triton_kernels
-                else MoeRunnerBackend.TRITON
-            )
+        backend = (
+            MoeRunnerBackend.TRITON_KERNELS
+            if self.use_triton_kernels
+            else MoeRunnerBackend.TRITON
+        )
        self.runner = MoeRunner(backend, moe_runner_config)

    def apply(