[Bugfix] Fix fused MoE IMA (sans chunking) by using int64 for strides (#34279)

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>

[Bugfix] Fix fused MoE IMA (sans chunking) by using int64 for strides (#34279)
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
d7982daf · Tyler Michael Smith · GitHub · 9b17c574 · d7982daf
Unverified Commit d7982daf authored Feb 11, 2026 by Tyler Michael Smith Committed by GitHub Feb 11, 2026
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 27 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +27 -27

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -95,19 +95,19 @@ def fused_moe_kernel_gptq_awq(
    # moving by 1 element in a particular dimension. E.g. `stride_am` is
    # how much to increase `a_ptr` by to get the element one row down
    # (A has M rows).
-    stride_am,
+    stride_am: tl.int64,
-    stride_ak,
+    stride_ak: tl.int64,
-    stride_be,
+    stride_be: tl.int64,
-    stride_bk,
+    stride_bk: tl.int64,
-    stride_bn,
+    stride_bn: tl.int64,
-    stride_cm,
+    stride_cm: tl.int64,
-    stride_cn,
+    stride_cn: tl.int64,
-    stride_bse,
+    stride_bse: tl.int64,
-    stride_bsk,
+    stride_bsk: tl.int64,
-    stride_bsn,
+    stride_bsn: tl.int64,
-    stride_bze,
+    stride_bze: tl.int64,
-    stride_bzk,
+    stride_bzk: tl.int64,
-    stride_bzn,
+    stride_bzn: tl.int64,
    block_k_diviable: tl.constexpr,
    group_size: tl.constexpr,
    # Meta-parameters
@@ -329,20 +329,20 @@ def fused_moe_kernel(
    # moving by 1 element in a particular dimension. E.g. `stride_am` is
    # how much to increase `a_ptr` by to get the element one row down
    # (A has M rows).
-    stride_am,
+    stride_am: tl.int64,
-    stride_ak,
+    stride_ak: tl.int64,
-    stride_be,
+    stride_be: tl.int64,
-    stride_bk,
+    stride_bk: tl.int64,
-    stride_bn,
+    stride_bn: tl.int64,
-    stride_cm,
+    stride_cm: tl.int64,
-    stride_cn,
+    stride_cn: tl.int64,
-    stride_asm,
+    stride_asm: tl.int64,
-    stride_ask,
+    stride_ask: tl.int64,
-    stride_bse,
+    stride_bse: tl.int64,
-    stride_bsk,
+    stride_bsk: tl.int64,
-    stride_bsn,
+    stride_bsn: tl.int64,
-    stride_bbe,  # bias expert stride
+    stride_bbe: tl.int64,  # bias expert stride
-    stride_bbn,  # bias N stride
+    stride_bbn: tl.int64,  # bias N stride
    # Block size for block-wise quantization
    group_n: tl.constexpr,
    group_k: tl.constexpr,