Commit 03a3c522 authored by lixh6's avatar lixh6
Browse files

Fix:Extend MAX_VPT from 32 to 256 to accommodate large-scale MoE models (e.g.,...

Fix:Extend MAX_VPT from 32 to 256 to accommodate large-scale MoE models (e.g., GLM-5-quantized model).
parent 4ad7a1fe
...@@ -72,7 +72,7 @@ __device__ inline bool cmp_eq(const T& a, const T& b) { ...@@ -72,7 +72,7 @@ __device__ inline bool cmp_eq(const T& a, const T& b) {
static constexpr int SIZE_WARP = 32; static constexpr int SIZE_WARP = 32;
static constexpr int WARPS_PER_CTA = 6; static constexpr int WARPS_PER_CTA = 6;
// static constexpr int MAX_VPT = 32; // maximum VPT we support, > params.VPT = num_expert / num_expert_group // static constexpr int MAX_VPT = 32; // maximum VPT we support, > params.VPT = num_expert / num_expert_group
static constexpr int MAX_VPT = 128; // Extend MAX_VPT from 32 to 128 to accommodate large-scale MoE models (e.g., GLM-4V-quantized model). static constexpr int MAX_VPT = 256; // Extend MAX_VPT from 32 to 256 to accommodate large-scale MoE models (e.g., GLM-5-quantized model).
// Create an alias for Array using AlignedArray // Create an alias for Array using AlignedArray
template <typename T, int N> template <typename T, int N>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment