Unverified Commit 0f8b5386 authored by Peter Pan's avatar Peter Pan Committed by GitHub
Browse files

[fix] benchmark : routed_scaling_factor is None (#8059)


Co-authored-by: default avatarXiaoyu Zhang <35585791+BBuf@users.noreply.github.com>
parent c33499a6
...@@ -18,10 +18,13 @@ def biased_grouped_topk_org(scores, bias, num_expert_group, topk_group, topk): ...@@ -18,10 +18,13 @@ def biased_grouped_topk_org(scores, bias, num_expert_group, topk_group, topk):
renormalize=True, renormalize=True,
num_expert_group=num_expert_group, num_expert_group=num_expert_group,
topk_group=topk_group, topk_group=topk_group,
routed_scaling_factor=2.5, # DeepSeek-R1 : 2.5, Kimi K2: 2.872
) )
def biased_grouped_topk_org_kernel(scores, bias, num_expert_group, topk_group, topk): def biased_grouped_topk_org_fuse_kernel(
scores, bias, num_expert_group, topk_group, topk
):
return moe_fused_gate(scores, bias, num_expert_group, topk_group, topk) return moe_fused_gate(scores, bias, num_expert_group, topk_group, topk)
...@@ -61,7 +64,7 @@ def benchmark(seq_length, provider): ...@@ -61,7 +64,7 @@ def benchmark(seq_length, provider):
) )
elif provider == "kernel": elif provider == "kernel":
ms, min_ms, max_ms = triton.testing.do_bench( ms, min_ms, max_ms = triton.testing.do_bench(
lambda: biased_grouped_topk_org_kernel( lambda: biased_grouped_topk_org_fuse_kernel(
scores.clone(), bias.clone(), num_expert_group, topk_group, topk scores.clone(), bias.clone(), num_expert_group, topk_group, topk
), ),
quantiles=quantiles, quantiles=quantiles,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment