Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
447be242
"vscode:/vscode.git/clone" did not exist on "268684439b4d5e99cc73937848c222a0322dc50a"
Unverified
Commit
447be242
authored
May 28, 2025
by
fzyzcjy
Committed by
GitHub
May 27, 2025
Browse files
Fix OOM when updating expert locations (#6660)
parent
183d9f96
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
17 deletions
+27
-17
python/sglang/srt/model_executor/expert_location_updater.py
python/sglang/srt/model_executor/expert_location_updater.py
+24
-15
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+3
-2
No files found.
python/sglang/srt/model_executor/expert_location_updater.py
View file @
447be242
...
...
@@ -27,12 +27,21 @@ from sglang.srt.managers.expert_location import (
logger
=
logging
.
getLogger
(
__name__
)
def
update_expert_location
(
class
ExpertLocationUpdater
:
def
__init__
(
self
):
self
.
_first_execution
=
True
def
update
(
self
,
routed_experts_weights_of_layer
:
Dict
[
int
,
List
[
torch
.
Tensor
]],
new_expert_location_metadata
:
ExpertLocationMetadata
,
nnodes
:
int
,
rank
:
int
,
):
):
if
self
.
_first_execution
:
self
.
_first_execution
=
False
torch
.
cuda
.
empty_cache
()
old_expert_location_metadata
=
get_global_expert_location_metadata
()
_update_expert_weights
(
routed_experts_weights_of_layer
,
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
447be242
...
...
@@ -73,8 +73,8 @@ from sglang.srt.mem_cache.memory_pool import (
TokenToKVPoolAllocator
,
)
from
sglang.srt.mem_cache.paged_allocator
import
PagedTokenToKVPoolAllocator
from
sglang.srt.model_executor
import
expert_location_updater
from
sglang.srt.model_executor.cuda_graph_runner
import
CudaGraphRunner
from
sglang.srt.model_executor.expert_location_updater
import
ExpertLocationUpdater
from
sglang.srt.model_executor.forward_batch_info
import
ForwardBatch
,
PPProxyTensors
from
sglang.srt.model_loader
import
get_model
from
sglang.srt.model_loader.loader
import
(
...
...
@@ -267,6 +267,7 @@ class ModelRunner:
if
self
.
server_args
.
enable_eplb
and
(
not
self
.
is_draft_worker
)
else
None
)
self
.
expert_location_updater
=
ExpertLocationUpdater
()
# Load the model
self
.
sampler
=
Sampler
()
...
...
@@ -600,7 +601,7 @@ class ModelRunner:
def
update_expert_location
(
self
,
new_expert_location_metadata
:
ExpertLocationMetadata
):
expert_location_updater
.
update
_expert_location
(
self
.
expert_location_updater
.
update
(
self
.
model
.
routed_experts_weights_of_layer
,
new_expert_location_metadata
,
nnodes
=
self
.
server_args
.
nnodes
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment