update op

92c6171e · zhuwenwen · a857453f · 92c6171e · 92c6171e · 92c6171e
Commit 92c6171e authored Sep 26, 2025 by zhuwenwen
3 changed files
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -43,8 +43,6 @@ from vllm.utils import direct_register_custom_op
 # from .rocm_aiter_fused_moe import is_rocm_aiter_moe_enabled
-if envs.VLLM_USE_LIGHTOP:
-    from lightop import op
 os.environ['DPSK_FP16_QUICK'] = os.environ.get('DPSK_FP16_QUICK', '0')
 dpsk_fp16_quick = os.environ.get('DPSK_FP16_QUICK') == '1'
@@ -1771,6 +1769,7 @@ def fused_experts_impl(
                                use_nn_moe=use_nn_moe)
        if envs.VLLM_USE_LIGHTOP and not dpsk_fp16_quick: 
+            from lightop import op as op
            op.moe_sum(intermediate_cache3.view(*intermediate_cache3.size()),
                    out_hidden_states[begin_chunk_idx:end_chunk_idx], shared_output[begin_chunk_idx:end_chunk_idx], None, routed_scaling_factor)
        # else:

--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -43,8 +43,6 @@ from vllm.platforms.interface import CpuArchEnum
 from vllm.utils import direct_register_custom_op, has_deep_ep, has_pplx
 from vllm import _custom_ops as ops
-if envs.VLLM_USE_LIGHTOP:
-    from lightop import op as op
 if current_platform.is_cuda_alike():
    from .fused_batched_moe import BatchedTritonExperts
@@ -1287,6 +1285,7 @@ class FusedMoE(torch.nn.Module):
            assert num_expert_group is not None
            if use_fused_gate:
                if envs.VLLM_USE_LIGHTOP:
+                    from lightop import op as op
                    topk_weights, topk_ids = op.moe_fused_gate(
                        router_logits,
                        e_score_correction_bias,

--- a/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
+++ b/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
@@ -9,8 +9,6 @@ from vllm.triton_utils import tl, triton
 from vllm.utils import cdiv, round_up
 import vllm.envs as envs
-if envs.VLLM_USE_LIGHTOP:
-    from lightop import op as op
 @triton.jit
@@ -234,6 +232,7 @@ def moe_align_block_size(
                                      device=topk_ids.device)
    if envs.VLLM_USE_LIGHTOP:
+        from lightop import op as op
        op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
                                expert_ids, num_tokens_post_pad, None, None, None)
    else: