Unverified Commit 90caf06c authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

fix: use DeepEPDispatcher on CUDA (#5180)

parent 6669d127
...@@ -50,7 +50,6 @@ from sglang.srt.layers.linear import ( ...@@ -50,7 +50,6 @@ from sglang.srt.layers.linear import (
) )
from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE
from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
from sglang.srt.layers.moe.topk import select_experts from sglang.srt.layers.moe.topk import select_experts
from sglang.srt.layers.quantization.base_config import QuantizationConfig from sglang.srt.layers.quantization.base_config import QuantizationConfig
...@@ -79,6 +78,8 @@ _is_cuda = is_cuda() ...@@ -79,6 +78,8 @@ _is_cuda = is_cuda()
if _is_cuda: if _is_cuda:
from sgl_kernel import awq_dequantize, bmm_fp8 from sgl_kernel import awq_dequantize, bmm_fp8
from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher
else: else:
from vllm import _custom_ops as ops from vllm import _custom_ops as ops
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment