Unverified Commit 1e60f87b authored by Jinzhen Lin's avatar Jinzhen Lin Committed by GitHub
Browse files

[Kernel] fix moe_align_block_size error condition (#12239)


Signed-off-by: default avatarJinzhen Lin <linjinzhen@hotmail.com>
parent 9705b90b
...@@ -234,14 +234,16 @@ void moe_align_block_size(torch::Tensor topk_ids, int64_t num_experts, ...@@ -234,14 +234,16 @@ void moe_align_block_size(torch::Tensor topk_ids, int64_t num_experts,
bool use_global_memory = false; bool use_global_memory = false;
bool use_i16 = false; // Use uint16_t for shared memory token counts bool use_i16 = false; // Use uint16_t for shared memory token counts
if (shared_mem_i16 > device_max_shared_mem) { if (shared_mem_i32 < device_max_shared_mem) {
use_global_memory = true; // Do nothing in this case. We're all set to use int32_t token counts
} else if (shared_mem_i32 > device_max_shared_mem && } else if (shared_mem_i16 < device_max_shared_mem &&
topk_ids.numel() <= 65535) { topk_ids.numel() <= 65535) {
// when nelements of topk_ids is smaller than 65535 (max value of uint16), // when nelements of topk_ids is smaller than 65535 (max value of uint16),
// element value of token_cnts would also smaller than 65535, // element value of token_cnts would also smaller than 65535,
// so we can use uint16 as dtype of token_cnts // so we can use uint16 as dtype of token_cnts
use_i16 = true; use_i16 = true;
} else {
use_global_memory = true;
} }
if (use_global_memory) { if (use_global_memory) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment