Fix `numel()` downcast in vllm/csrc/moe/moe_align_sum_kernels.cu +2 (#17082)

Co-authored-by: mgoin <mgoin64@gmail.com>

Fix `numel()` downcast in vllm/csrc/moe/moe_align_sum_kernels.cu +2 (#17082)
Co-authored-by: mgoin <mgoin64@gmail.com>
86debab5 · Richard Barnes · GitHub · be250bbc · 86debab5 · 86debab5
Unverified Commit 86debab5 authored Jul 01, 2025 by Richard Barnes Committed by GitHub Jul 01, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

csrc/moe/moe_align_sum_kernels.cu csrc/moe/moe_align_sum_kernels.cu +1 -1

csrc/moe/topk_softmax_kernels.cu csrc/moe/topk_softmax_kernels.cu +1 -1

No files found.
--- a/csrc/moe/moe_align_sum_kernels.cu
+++ b/csrc/moe/moe_align_sum_kernels.cu
@@ -239,7 +239,7 @@ void moe_sum(torch::Tensor& input,   // [num_tokens, topk, hidden_size]
             torch::Tensor& output)  // [num_tokens, hidden_size]
 {
  const int hidden_size = input.size(-1);
-  const int num_tokens = output.numel() / hidden_size;
+  const auto num_tokens = output.numel() / hidden_size;
  const int topk = input.size(1);

  dim3 grid(num_tokens);

--- a/csrc/moe/topk_softmax_kernels.cu
+++ b/csrc/moe/topk_softmax_kernels.cu
@@ -492,7 +492,7 @@ void topk_softmax(
    torch::Tensor& gating_output)               // [num_tokens, num_experts]
 {
    const int num_experts = gating_output.size(-1);
-    const int num_tokens = gating_output.numel() / num_experts;
+    const auto num_tokens = gating_output.numel() / num_experts;
    const int topk = topk_weights.size(-1);

    const bool is_pow_2 = (num_experts != 0) && ((num_experts & (num_experts - 1)) == 0);