update op

dfe1a844 · zhuwenwen · dbce493a · dfe1a844 · dfe1a844
Commit dfe1a844 authored Sep 26, 2025 by zhuwenwen
2 changed files
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -47,7 +47,6 @@ from vllm.platforms.interface import CpuArchEnum
 from vllm.utils import (cdiv, direct_register_custom_op, has_deep_ep, has_pplx,
                        round_up)
 from vllm import _custom_ops as ops
-from lightop import op

 if current_platform.is_cuda_alike():
    from .fused_batched_moe import BatchedTritonExperts
@@ -1579,6 +1578,7 @@ class FusedMoE(CustomOp):
            assert num_expert_group is not None
            if use_fused_gate:
                if envs.VLLM_USE_LIGHTOP:
+                    from lightop import op as op
                    topk_weights, topk_ids = op.moe_fused_gate(
                        router_logits,
                        e_score_correction_bias,

--- a/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
+++ b/vllm/model_executor/layers/fused_moe/moe_align_block_size.py
@@ -9,8 +9,6 @@ from vllm.triton_utils import triton
 from vllm.utils import round_up

 import vllm.envs as envs
-if envs.VLLM_USE_LIGHTOP:
-    from lightop import op as op


 def moe_align_block_size(
@@ -97,6 +95,7 @@ def moe_align_block_size(
                                      device=topk_ids.device)

    if envs.VLLM_USE_LIGHTOP:
+        from lightop import op as op
        op.moe_align_block_size(topk_ids, num_experts, block_size, sorted_ids,
                                expert_ids, num_tokens_post_pad, None)
    else: