Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
211f4070
Unverified
Commit
211f4070
authored
Nov 04, 2025
by
Trevor Morris
Committed by
GitHub
Nov 04, 2025
Browse files
fix: Lazy import mooncake-ep to fix extra gpu contexts being created (#12641)
parent
befa41a1
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
8 deletions
+6
-8
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
+6
-8
No files found.
python/sglang/srt/layers/moe/token_dispatcher/mooncake.py
View file @
211f4070
...
@@ -21,13 +21,6 @@ from sglang.srt.utils import get_int_env_var
...
@@ -21,13 +21,6 @@ from sglang.srt.utils import get_int_env_var
if
TYPE_CHECKING
:
if
TYPE_CHECKING
:
from
sglang.srt.single_batch_overlap
import
CombineOverlapArgs
from
sglang.srt.single_batch_overlap
import
CombineOverlapArgs
try
:
from
mooncake.mooncake_ep_buffer
import
Buffer
use_mooncake_ep
=
True
except
ImportError
:
use_mooncake_ep
=
False
from
enum
import
Enum
,
auto
from
enum
import
Enum
,
auto
import
torch
import
torch
...
@@ -86,6 +79,9 @@ class EPBuffer:
...
@@ -86,6 +79,9 @@ class EPBuffer:
if
cls
.
_buffer
is
not
None
:
if
cls
.
_buffer
is
not
None
:
return
cls
.
_buffer
return
cls
.
_buffer
# Lazy import Buffer to avoid creating CUDA context at module import time
from
mooncake.mooncake_ep_buffer
import
Buffer
cls
.
_hidden_size
=
hidden_size
cls
.
_hidden_size
=
hidden_size
cls
.
_num_max_dispatch_tokens_per_rank
=
num_max_dispatch_tokens_per_rank
cls
.
_num_max_dispatch_tokens_per_rank
=
num_max_dispatch_tokens_per_rank
cls
.
_num_experts
=
num_experts
cls
.
_num_experts
=
num_experts
...
@@ -122,7 +118,9 @@ class _MooncakeEPDispatcherImpl:
...
@@ -122,7 +118,9 @@ class _MooncakeEPDispatcherImpl:
return_recv_hook
:
bool
,
return_recv_hook
:
bool
,
deepep_mode
:
DeepEPMode
,
deepep_mode
:
DeepEPMode
,
):
):
if
not
use_mooncake_ep
:
try
:
from
mooncake.mooncake_ep_buffer
import
Buffer
# noqa: F401
except
ImportError
:
raise
ImportError
(
raise
ImportError
(
"Mooncake EP is not installed. Please install Mooncake package at "
"Mooncake EP is not installed. Please install Mooncake package at "
"https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md "
"https://github.com/kvcache-ai/Mooncake/blob/main/doc/en/build.md "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment