Commit dfe1a844 authored by zhuwenwen's avatar zhuwenwen
Browse files

update op

parent dbce493a
......@@ -47,7 +47,6 @@ from vllm.platforms.interface import CpuArchEnum
from vllm.utils import (cdiv, direct_register_custom_op, has_deep_ep, has_pplx,
round_up)
from vllm import _custom_ops as ops
from lightop import op
if current_platform.is_cuda_alike():
from .fused_batched_moe import BatchedTritonExperts
......@@ -1579,6 +1578,7 @@ class FusedMoE(CustomOp):
assert num_expert_group is not None
if use_fused_gate:
if envs.VLLM_USE_LIGHTOP:
from lightop import op as op
topk_weights, topk_ids = op.moe_fused_gate(
router_logits,
e_score_correction_bias,
......
......@@ -9,8 +9,6 @@ from vllm.triton_utils import triton
from vllm.utils import round_up
import vllm.envs as envs
if envs.VLLM_USE_LIGHTOP:
from lightop import op as op
def moe_align_block_size(
......@@ -97,6 +95,7 @@ def moe_align_block_size(
device=topk_ids.device)
if envs.VLLM_USE_LIGHTOP:
from lightop import op as op
op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
expert_ids, num_tokens_post_pad, None)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment