[NVIDIA] Remove unused `get_fused_moe_impl_class` function (#9764)

9a719b7a · Kaixi Hou · GitHub · 3fa62da7 · 9a719b7a · 9a719b7a
Unverified Commit 9a719b7a authored Sep 05, 2025 by Kaixi Hou Committed by GitHub Sep 05, 2025
Showing with 1 addition and 18 deletions

python/sglang/srt/layers/moe/fused_moe_triton/layer.py python/sglang/srt/layers/moe/fused_moe_triton/layer.py +0 -13

python/sglang/srt/layers/quantization/fp8.py python/sglang/srt/layers/quantization/fp8.py +1 -5

No files found.
--- a/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/layer.py
@@ -1074,16 +1074,3 @@ class FlashInferFP4MoE(FusedMoE):
        )[0]

        return result
-
-
-def get_fused_moe_impl_class():
-    """Factory function to get the appropriate FusedMoE implementation class."""
-    if should_use_flashinfer_trtllm_moe() and _is_fp4_quantization_enabled():
-        # Use FP4 variant when FP4 quantization is enabled
-        return FlashInferFP4MoE
-    elif should_use_flashinfer_trtllm_moe():
-        # Use regular FlashInfer variant for non-FP4 FlashInfer cases
-        return FlashInferFusedMoE
-    else:
-        # Default case
-        return FusedMoE
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -635,11 +635,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
            layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
            layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
            assert self.quant_config.activation_scheme == "dynamic"
-            if (
-                get_bool_env_var("SGLANG_CUTLASS_MOE")
-                and self.cutlass_fp8_supported
-                and (is_sm100_supported() or is_sm90_supported())
-            ):
+            if self.use_cutlass_fused_experts_fp8:
                self.ab_strides1 = torch.full(
                    (num_experts,),
                    hidden_size,