Commit 70764a8f authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-dev-wm' into 'v0.11.0-dev'

[fix]解决moe_fused_gate编译错误

See merge request dcutoolkit/deeplearing/vllm!254
parents e7f2785f 3c039206
...@@ -331,10 +331,10 @@ __global__ void moe_fused_gate_kernel( ...@@ -331,10 +331,10 @@ __global__ void moe_fused_gate_kernel(
// Macro to compute compile-time constants and launch the kernel. // Macro to compute compile-time constants and launch the kernel.
#define LAUNCH_MOE_GATE_CONFIG(T, EXPERTS, EXPERT_GROUP) \ #define LAUNCH_MOE_GATE_CONFIG(T, EXPERTS, EXPERT_GROUP) \
do { \ do { \
int VPT = (EXPERTS) / (EXPERT_GROUP); \ constexpr int VPT = (EXPERTS) / (EXPERT_GROUP); \
/* If EXPERT_GROUP > WARP_SIZE, fall back to 1 row per warp */ \ /* If EXPERT_GROUP > WARP_SIZE, fall back to 1 row per warp */ \
int ROWS_PER_WARP = ((EXPERT_GROUP) <= SIZE_WARP) ? (SIZE_WARP / (EXPERT_GROUP)) : 1; \ constexpr int ROWS_PER_WARP = ((EXPERT_GROUP) <= SIZE_WARP) ? (SIZE_WARP / (EXPERT_GROUP)) : 1; \
int ROWS_PER_CTA = WARPS_PER_CTA * ROWS_PER_WARP; \ constexpr int ROWS_PER_CTA = WARPS_PER_CTA * ROWS_PER_WARP; \
moe_fused_gate_kernel<T, VPT, (EXPERTS), (EXPERT_GROUP), ROWS_PER_WARP, ROWS_PER_CTA, WARPS_PER_CTA> \ moe_fused_gate_kernel<T, VPT, (EXPERTS), (EXPERT_GROUP), ROWS_PER_WARP, ROWS_PER_CTA, WARPS_PER_CTA> \
<<<num_blocks, block_dim, 0, stream>>>( \ <<<num_blocks, block_dim, 0, stream>>>( \
input.data_ptr(), \ input.data_ptr(), \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment