assertactivationinself.act_to_impl,f"{activation} is not supported."
output=cpu_fused_moe(
assertnotapply_router_weight_on_input
input,
topk_weights,topk_ids=select_experts(
layer.w13_weight,
hidden_states=x,
layer.w2_weight,
router_logits=router_logits,
getattr(layer,"w13_bias",None),
use_grouped_topk=use_grouped_topk,
getattr(layer,"w2_bias",None),
top_k=top_k,
topk_weights,
renormalize=renormalize,
topk_ids,
topk_group=topk_group,
activation,
num_expert_group=num_expert_group,
self.isa,
custom_routing_function=custom_routing_function,
scoring_func=scoring_func,
routed_scaling_factor=routed_scaling_factor,
e_score_correction_bias=e_score_correction_bias,
)
)
returnoutput
defforward_torch(
self,
layer:torch.nn.Module,
input:torch.Tensor,
topk_weights:torch.Tensor,
topk_ids:torch.Tensor,
activation:str,
global_num_experts:int=-1,
)->torch.Tensor:
output=torch.empty_like(input)
layer_id=id(layer)
torch.ops.vllm.cpu_fused_moe_torch(
layer_id,
output,
input,
topk_weights,
topk_ids,
activation,
global_num_experts,
)
returnoutput
defcpu_fused_moe_torch(
layer_id:int,
output:torch.Tensor,
input:torch.Tensor,
topk_weights:torch.Tensor,
topk_ids:torch.Tensor,
activation:str,
global_num_experts:int=-1,
)->None:
layer=_CPU_MOE_LAYER_CACHE[layer_id]()
# Ref code from https://github.com/sgl-project/sglang/blob/716e682721397df103f347d22da8bd46c6016dab/python/sglang/srt/layers/moe/fused_moe_native.py#L53
# Ref code from https://github.com/sgl-project/sglang/blob/716e682721397df103f347d22da8bd46c6016dab/python/sglang/srt/layers/moe/fused_moe_native.py#L53