Unverified Commit dfab5f37 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix benchmark script `moe_permute_unpermute` (#32949)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 586a57ad
...@@ -8,7 +8,7 @@ import ray ...@@ -8,7 +8,7 @@ import ray
import torch import torch
from transformers import AutoConfig from transformers import AutoConfig
from vllm.model_executor.layers.fused_moe.fused_moe import * from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import ( from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
_moe_permute, _moe_permute,
_moe_unpermute_and_reduce, _moe_unpermute_and_reduce,
...@@ -86,9 +86,7 @@ def benchmark_permute( ...@@ -86,9 +86,7 @@ def benchmark_permute(
sorted_token_ids, sorted_token_ids,
expert_ids, expert_ids,
inv_perm, inv_perm,
) = _moe_permute( ) = _moe_permute(qhidden_states, None, topk_ids, num_experts, None, 16)
qhidden_states, None, topk_ids, num_experts, None, align_block_size
)
# JIT compilation & warmup # JIT compilation & warmup
run() run()
...@@ -182,7 +180,7 @@ def benchmark_unpermute( ...@@ -182,7 +180,7 @@ def benchmark_unpermute(
expert_ids, expert_ids,
inv_perm, inv_perm,
) = _moe_permute( ) = _moe_permute(
qhidden_states, None, topk_ids, num_experts, None, align_block_size qhidden_states, None, topk_ids, num_experts, None, block_m=16
) )
# convert to fp16/bf16 as gemm output # convert to fp16/bf16 as gemm output
return ( return (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment