Unverified Commit a67dec7c authored by Jinzhen Lin's avatar Jinzhen Lin Committed by GitHub
Browse files

[Bugfix] fix IMA issue in certain cases of the moe marlin kernel (#28619)


Signed-off-by: default avatarJinzhen Lin <jinzhen.ljz@antgroup.com>
Co-authored-by: default avataryoukaichao <youkaichao@gmail.com>
Co-authored-by: default avatarMichael Goin <mgoin64@gmail.com>
Co-authored-by: default avatarWentao Ye <44945378+yewentao256@users.noreply.github.com>
parent 77740191
......@@ -489,10 +489,11 @@ __global__ void Marlin(
#pragma unroll
for (int i = 0; i < 4; i++) {
int idx = tid4 * 4 + i;
idx = idx < block_num_valid_tokens ? idx : 0;
if (idx < block_num_valid_tokens) {
if constexpr (w_type == vllm::kFE2M1f && s_type == vllm::kFE4M3fn) {
sh_block_topk_weights[idx] = __hmul2(
global_scale, Dtype::num2num2(Dtype::float2num(
sh_block_topk_weights[idx] =
__hmul2(global_scale,
Dtype::num2num2(Dtype::float2num(
topk_weights_ptr[sh_block_sorted_ids[idx]])));
} else {
sh_block_topk_weights[idx] = Dtype::num2num2(
......@@ -501,6 +502,7 @@ __global__ void Marlin(
}
}
}
}
__syncthreads();
};
......
......@@ -38,7 +38,6 @@ class SharedFusedMoE(FusedMoE):
# TODO(wentao): find the root cause and remove this condition
self.enable_eplb
or (self.moe_config.use_flashinfer_cutlass_kernels and self.dp_size > 1)
or self.use_marlin_kernels
)
and self._shared_experts is not None
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment