Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
56a1b6e3
Unverified
Commit
56a1b6e3
authored
Feb 09, 2026
by
William Arnold
Committed by
GitHub
Feb 10, 2026
Browse files
feat: Add SGLang /engine weight update endpoints (#6094)
parent
98eb6b7e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
82 additions
and
0 deletions
+82
-0
components/src/dynamo/sglang/request_handlers/handler_base.py
...onents/src/dynamo/sglang/request_handlers/handler_base.py
+82
-0
No files found.
components/src/dynamo/sglang/request_handlers/handler_base.py
View file @
56a1b6e3
...
@@ -250,6 +250,73 @@ class BaseWorkerHandler(BaseGenerativeHandler):
...
@@ -250,6 +250,73 @@ class BaseWorkerHandler(BaseGenerativeHandler):
await
self
.
engine
.
tokenizer_manager
.
stop_profile
()
await
self
.
engine
.
tokenizer_manager
.
stop_profile
()
return
{
"status"
:
"ok"
,
"message"
:
"Profiling stopped"
}
return
{
"status"
:
"ok"
,
"message"
:
"Profiling stopped"
}
async
def
update_weights_from_disk
(
self
,
body
:
dict
)
->
dict
:
"""Update model weights from disk without restarting the server."""
from
sglang.srt.managers.io_struct
import
UpdateWeightFromDiskReqInput
req
=
UpdateWeightFromDiskReqInput
(
**
body
)
(
success
,
message
,
num_paused_requests
,
)
=
await
self
.
engine
.
tokenizer_manager
.
update_weights_from_disk
(
req
,
None
)
return
{
"success"
:
success
,
"message"
:
message
,
"num_paused_requests"
:
num_paused_requests
,
}
async
def
update_weights_from_tensor
(
self
,
body
:
dict
)
->
dict
:
"""Update model weights from tensors without restarting the server."""
from
sglang.srt.managers.io_struct
import
UpdateWeightsFromTensorReqInput
req
=
UpdateWeightsFromTensorReqInput
(
**
body
)
(
success
,
message
,
)
=
await
self
.
engine
.
tokenizer_manager
.
update_weights_from_tensor
(
req
,
None
)
return
{
"success"
:
success
,
"message"
:
message
}
async
def
update_weights_from_distributed
(
self
,
body
:
dict
)
->
dict
:
"""Update model weights using distributed online synchronization."""
from
sglang.srt.managers.io_struct
import
UpdateWeightsFromDistributedReqInput
req
=
UpdateWeightsFromDistributedReqInput
(
**
body
)
(
success
,
message
,
)
=
await
self
.
engine
.
tokenizer_manager
.
update_weights_from_distributed
(
req
,
None
)
return
{
"success"
:
success
,
"message"
:
message
}
async
def
update_weights_from_ipc
(
self
,
body
:
dict
)
->
dict
:
"""Update model weights from IPC for checkpoint-engine integration."""
from
sglang.srt.managers.io_struct
import
UpdateWeightsFromIPCReqInput
req
=
UpdateWeightsFromIPCReqInput
(
**
body
)
success
,
message
=
await
self
.
engine
.
tokenizer_manager
.
update_weights_from_ipc
(
req
,
None
)
if
success
and
not
self
.
engine
.
tokenizer_manager
.
initial_weights_loaded
:
self
.
engine
.
tokenizer_manager
.
initial_weights_loaded
=
True
return
{
"success"
:
success
,
"message"
:
message
}
async
def
update_weight_version
(
self
,
body
:
dict
)
->
dict
:
"""Update the active weight version without changing model weights."""
from
sglang.srt.managers.io_struct
import
UpdateWeightVersionReqInput
req
=
UpdateWeightVersionReqInput
(
**
body
)
if
req
.
abort_all_requests
:
self
.
engine
.
tokenizer_manager
.
abort_request
(
abort_all
=
True
)
self
.
engine
.
tokenizer_manager
.
server_args
.
weight_version
=
req
.
new_version
return
{
"success"
:
True
,
"message"
:
f
"Weight version updated to
{
req
.
new_version
}
"
,
"new_version"
:
req
.
new_version
,
}
def
register_engine_routes
(
self
,
runtime
)
->
None
:
def
register_engine_routes
(
self
,
runtime
)
->
None
:
"""Register all engine routes for this handler.
"""Register all engine routes for this handler.
...
@@ -264,6 +331,21 @@ class BaseWorkerHandler(BaseGenerativeHandler):
...
@@ -264,6 +331,21 @@ class BaseWorkerHandler(BaseGenerativeHandler):
runtime
.
register_engine_route
(
runtime
.
register_engine_route
(
"resume_memory_occupation"
,
self
.
resume_memory_occupation
"resume_memory_occupation"
,
self
.
resume_memory_occupation
)
)
runtime
.
register_engine_route
(
"update_weights_from_disk"
,
self
.
update_weights_from_disk
)
runtime
.
register_engine_route
(
"update_weights_from_tensor"
,
self
.
update_weights_from_tensor
)
runtime
.
register_engine_route
(
"update_weights_from_distributed"
,
self
.
update_weights_from_distributed
)
runtime
.
register_engine_route
(
"update_weights_from_ipc"
,
self
.
update_weights_from_ipc
)
runtime
.
register_engine_route
(
"update_weight_version"
,
self
.
update_weight_version
)
@
abstractmethod
@
abstractmethod
async
def
generate
(
self
,
request
:
Dict
[
str
,
Any
],
context
:
Context
):
async
def
generate
(
self
,
request
:
Dict
[
str
,
Any
],
context
:
Context
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment