Unverified Commit dfab5f37 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix benchmark script `moe_permute_unpermute` (#32949)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent 586a57ad
......@@ -8,7 +8,7 @@ import ray
import torch
from transformers import AutoConfig
from vllm.model_executor.layers.fused_moe.fused_moe import *
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.moe_permute_unpermute import (
_moe_permute,
_moe_unpermute_and_reduce,
......@@ -86,9 +86,7 @@ def benchmark_permute(
sorted_token_ids,
expert_ids,
inv_perm,
) = _moe_permute(
qhidden_states, None, topk_ids, num_experts, None, align_block_size
)
) = _moe_permute(qhidden_states, None, topk_ids, num_experts, None, 16)
# JIT compilation & warmup
run()
......@@ -182,7 +180,7 @@ def benchmark_unpermute(
expert_ids,
inv_perm,
) = _moe_permute(
qhidden_states, None, topk_ids, num_experts, None, align_block_size
qhidden_states, None, topk_ids, num_experts, None, block_m=16
)
# convert to fp16/bf16 as gemm output
return (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment