[MoE] fix: incorrect weight initialization for cutlass_fused_experts_fp8 (#10144)

5a7e10fe · Cheng Wan · GitHub · 33467c05 · 5a7e10fe
Unverified Commit 5a7e10fe authored Sep 07, 2025 by Cheng Wan Committed by GitHub Sep 07, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

python/sglang/srt/layers/quantization/fp8.py python/sglang/srt/layers/quantization/fp8.py +1 -1

No files found.
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -656,7 +656,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
                )
                self.c_strides2 = torch.full(
                    (num_experts,),
-                    intermediate_size_per_partition,
+                    hidden_size,
                    device=w2_weight.device,
                    dtype=torch.int64,
                )