Unverified Commit f9995398 authored by danisereb's avatar danisereb Committed by GitHub
Browse files

Add missing import of fused_topk to benchmark_moe (#32784)


Signed-off-by: default avatarDaniel Serebrenik <daserebrenik@nvidia.com>
parent e1da249c
...@@ -15,6 +15,7 @@ import ray ...@@ -15,6 +15,7 @@ import ray
import torch import torch
from ray.experimental.tqdm_ray import tqdm from ray.experimental.tqdm_ray import tqdm
from vllm.model_executor.layers.fused_moe import fused_topk
from vllm.model_executor.layers.fused_moe.config import ( from vllm.model_executor.layers.fused_moe.config import (
FusedMoEConfig, FusedMoEConfig,
FusedMoEParallelConfig, FusedMoEParallelConfig,
...@@ -200,23 +201,26 @@ def benchmark_config( ...@@ -200,23 +201,26 @@ def benchmark_config(
block_shape=block_quant_shape, block_shape=block_quant_shape,
) )
deep_gemm_experts = mk.FusedMoEModularKernel( deep_gemm_experts = None
prepare_finalize=MoEPrepareAndFinalizeNoEP(), if use_deep_gemm:
fused_experts=TritonOrDeepGemmExperts( deep_gemm_experts = mk.FusedMoEModularKernel(
moe_config=FusedMoEConfig( prepare_finalize=MoEPrepareAndFinalizeNoEP(),
num_experts=num_experts, fused_experts=TritonOrDeepGemmExperts(
experts_per_token=topk, moe_config=FusedMoEConfig(
hidden_dim=hidden_size, num_experts=num_experts,
intermediate_size_per_partition=shard_intermediate_size, experts_per_token=topk,
num_local_experts=num_experts, hidden_dim=hidden_size,
activation="silu", intermediate_size_per_partition=shard_intermediate_size,
parallel_config=FusedMoEParallelConfig.make_no_parallel(), num_local_experts=num_experts,
in_dtype=init_dtype, activation="silu",
routing_method=RoutingMethodType.TopK, moe_parallel_config=FusedMoEParallelConfig.make_no_parallel(),
in_dtype=init_dtype,
routing_method=RoutingMethodType.TopK,
device="cuda",
),
quant_config=quant_config,
), ),
quant_config=quant_config, )
),
)
with override_config(config): with override_config(config):
topk_weights, topk_ids, token_expert_indices = fused_topk( topk_weights, topk_ids, token_expert_indices = fused_topk(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment