update fused_moe.py

031d4ca8 · zhuwenwen · 77f7bb45 · 031d4ca8
Commit 031d4ca8 authored Apr 01, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 30 additions and 30 deletions

vllm/model_executor/layers/fused_moe/fused_moe.py vllm/model_executor/layers/fused_moe/fused_moe.py +30 -30

No files found.
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -109,20 +109,6 @@ def fused_moe_kernel_gptq_awq(
    # Map program ids `pid` to the block of C it should compute.
    # This is done in a grouped ordering to promote L2 data reuse.
    pid = tl.program_id(axis=0)
-    # num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
-    # num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
-    # num_pid_in_group = GROUP_SIZE_M * num_pid_n
-    # group_id = pid // num_pid_in_group
-    # first_pid_m = group_id * GROUP_SIZE_M
-    # group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
-    # pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
-    # pid_n = (pid % num_pid_in_group) // group_size_m
-    if GROUP_SIZE_M ==1:
-        num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
-        pid_m = pid // num_pid_n
-        pid_n = pid % num_pid_n
-    else:
    num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
    num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
    num_pid_in_group = GROUP_SIZE_M * num_pid_n
@@ -332,6 +318,19 @@ def fused_moe_kernel(
    # Map program ids `pid` to the block of C it should compute.
    # This is done in a grouped ordering to promote L2 data reuse.
    pid = tl.program_id(axis=0)
+    # num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
+    # num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
+    # num_pid_in_group = GROUP_SIZE_M * num_pid_n
+    # group_id = pid // num_pid_in_group
+    # first_pid_m = group_id * GROUP_SIZE_M
+    # group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
+    # pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
+    # pid_n = (pid % num_pid_in_group) // group_size_m
+    if GROUP_SIZE_M ==1:
+        num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
+        pid_m = pid // num_pid_n
+        pid_n = pid % num_pid_n
+    else:
        num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
        num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
        num_pid_in_group = GROUP_SIZE_M * num_pid_n
@@ -341,6 +340,7 @@ def fused_moe_kernel(
        pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
        pid_n = (pid % num_pid_in_group) // group_size_m
    # ----------------------------------------------------------
    # Create pointers for the first blocks of A and B.
    # We will advance this pointer as we move in the K direction