Commit 00bbf0bb authored by zhuwenwen's avatar zhuwenwen
Browse files

update moe_sum interface

parent 484fcfca
......@@ -1770,8 +1770,9 @@ def fused_experts_impl(
if envs.VLLM_USE_LIGHTOP and not dpsk_fp16_quick:
from lightop import op as op
op.moe_sum(intermediate_cache3.view(*intermediate_cache3.size()),
out_hidden_states[begin_chunk_idx:end_chunk_idx], shared_output[begin_chunk_idx:end_chunk_idx], None, routed_scaling_factor)
op.moe_sum(input=intermediate_cache3.view(*intermediate_cache3.size()),
output=out_hidden_states[begin_chunk_idx:end_chunk_idx], bias=shared_output[begin_chunk_idx:end_chunk_idx],
expert_mask=None, num_local_tokens=None, factor=routed_scaling_factor)
# else:
# ops.moe_sum(intermediate_cache3.view(*intermediate_cache3.size()),
# out_hidden_states[begin_chunk_idx:end_chunk_idx])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment