Unverified Commit 211f4070 authored by Trevor Morris's avatar Trevor Morris Committed by GitHub
Browse files

fix: Lazy import mooncake-ep to fix extra gpu contexts being created (#12641)

parent befa41a1
...@@ -21,13 +21,6 @@ from sglang.srt.utils import get_int_env_var ...@@ -21,13 +21,6 @@ from sglang.srt.utils import get_int_env_var
if TYPE_CHECKING: if TYPE_CHECKING:
from sglang.srt.single_batch_overlap import CombineOverlapArgs from sglang.srt.single_batch_overlap import CombineOverlapArgs
try:
from mooncake.mooncake_ep_buffer import Buffer
use_mooncake_ep = True
except ImportError:
use_mooncake_ep = False
from enum import Enum, auto from enum import Enum, auto
import torch import torch
...@@ -86,6 +79,9 @@ class EPBuffer: ...@@ -86,6 +79,9 @@ class EPBuffer:
if cls._buffer is not None: if cls._buffer is not None:
return cls._buffer return cls._buffer
# Lazy import Buffer to avoid creating CUDA context at module import time
from mooncake.mooncake_ep_buffer import Buffer
cls._hidden_size = hidden_size cls._hidden_size = hidden_size
cls._num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank cls._num_max_dispatch_tokens_per_rank = num_max_dispatch_tokens_per_rank
cls._num_experts = num_experts cls._num_experts = num_experts
...@@ -122,7 +118,9 @@ class _MooncakeEPDispatcherImpl: ...@@ -122,7 +118,9 @@ class _MooncakeEPDispatcherImpl:
return_recv_hook: bool, return_recv_hook: bool,
deepep_mode: DeepEPMode, deepep_mode: DeepEPMode,
): ):
if not use_mooncake_ep: try:
from mooncake.mooncake_ep_buffer import Buffer # noqa: F401
except ImportError:
raise ImportError( raise ImportError(
"Mooncake EP is not installed. Please install Mooncake package at " "Mooncake EP is not installed. Please install Mooncake package at "
"https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md " "https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment