"vscode:/vscode.git/clone" did not exist on "90e10deeb34fbbde30fff00917cc1cef00448747"
Commit 3af22744 authored by lixh6's avatar lixh6
Browse files

Fix: Extend MAX_VPT to 128 for large-scale MoE models (e.g., GLM4.5V-quantized model).

parent cfd6a543
...@@ -71,7 +71,8 @@ __device__ inline bool cmp_eq(const T& a, const T& b) { ...@@ -71,7 +71,8 @@ __device__ inline bool cmp_eq(const T& a, const T& b) {
// Fixed constants common to both dynamic and static template versions: // Fixed constants common to both dynamic and static template versions:
static constexpr int SIZE_WARP = 32; static constexpr int SIZE_WARP = 32;
static constexpr int WARPS_PER_CTA = 6; static constexpr int WARPS_PER_CTA = 6;
static constexpr int MAX_VPT = 32; // maximum VPT we support, > params.VPT = num_expert / num_expert_group // static constexpr int MAX_VPT = 32; // maximum VPT we support, > params.VPT = num_expert / num_expert_group
static constexpr int MAX_VPT = 128; // Extend MAX_VPT from 32 to 128 to accommodate large-scale MoE models (e.g., GLM-4V-quantized model).
// Create an alias for Array using AlignedArray // Create an alias for Array using AlignedArray
template <typename T, int N> template <typename T, int N>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment