Fix: Extend MAX_VPT to 128 for large-scale MoE models (e.g., GLM4.5V-quantized model).

3af22744 · lixh6 · cfd6a543 · 3af22744
Commit 3af22744 authored Mar 05, 2026 by lixh6
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

csrc/moe/moe_fused_gate.cu csrc/moe/moe_fused_gate.cu +2 -1

No files found.
--- a/csrc/moe/moe_fused_gate.cu
+++ b/csrc/moe/moe_fused_gate.cu
@@ -71,7 +71,8 @@ __device__ inline bool cmp_eq(const T& a, const T& b) {
 // Fixed constants common to both dynamic and static template versions:
 static constexpr int SIZE_WARP = 32;
 static constexpr int WARPS_PER_CTA = 6;
-static constexpr int MAX_VPT = 32;  // maximum VPT we support, > params.VPT = num_expert / num_expert_group
+// static constexpr int MAX_VPT = 32;  // maximum VPT we support, > params.VPT = num_expert / num_expert_group
+static constexpr int MAX_VPT = 128; // Extend MAX_VPT from 32 to 128 to accommodate large-scale MoE models (e.g., GLM-4V-quantized model).

 // Create an alias for Array using AlignedArray
 template <typename T, int N>