Fix the moe padding conditional logic (#4081)

71ab0dab · HAI · GitHub · d3d4d767 · 71ab0dab
Unverified Commit 71ab0dab authored Mar 05, 2025 by HAI Committed by GitHub Mar 05, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 1 deletion

python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py +6 -1

No files found.
--- a/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
+++ b/python/sglang/srt/layers/moe/fused_moe_triton/fused_moe.py
@@ -18,6 +18,7 @@ from sglang.srt.layers.quantization.fp8_kernel import per_token_group_quant_fp8
 from sglang.srt.layers.quantization.int8_kernel import per_token_group_quant_int8
 from sglang.srt.utils import (
    direct_register_custom_op,
+    get_bool_env_var,
    get_device_name,
    is_cuda_available,
    is_hip,
@@ -941,7 +942,11 @@ def fused_experts_impl(
    no_combine: bool = False,
 ):
    padded_size = padding_size
-    if not use_fp8_w8a8 or not use_int8_w8a8 or block_shape is not None:
+    if (
+        not (use_fp8_w8a8 or use_int8_w8a8)
+        or block_shape is not None
+        or (is_hip_ and get_bool_env_var("CK_MOE"))
+    ):
        padded_size = 0
    # Check constraints.