Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6450c122
Unverified
Commit
6450c122
authored
May 08, 2025
by
fzyzcjy
Committed by
GitHub
May 08, 2025
Browse files
Tiny refactor weight loading logic (#5232)
parent
b6cf3532
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
17 deletions
+19
-17
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+1
-6
python/sglang/srt/model_loader/loader.py
python/sglang/srt/model_loader/loader.py
+18
-11
No files found.
python/sglang/srt/model_executor/model_runner.py
View file @
6450c122
...
@@ -557,12 +557,7 @@ class ModelRunner:
...
@@ -557,12 +557,7 @@ class ModelRunner:
return
iter
return
iter
def
model_load_weights
(
model
,
iter
):
def
model_load_weights
(
model
,
iter
):
model
.
load_weights
(
iter
)
DefaultModelLoader
.
load_weights_and_postprocess
(
model
,
iter
,
target_device
)
for
_
,
module
in
self
.
model
.
named_modules
():
quant_method
=
getattr
(
module
,
"quant_method"
,
None
)
if
quant_method
is
not
None
:
with
device_loading_context
(
module
,
target_device
):
quant_method
.
process_weights_after_loading
(
module
)
return
model
return
model
with
set_default_torch_dtype
(
self
.
model_config
.
dtype
):
with
set_default_torch_dtype
(
self
.
model_config
.
dtype
):
...
...
python/sglang/srt/model_loader/loader.py
View file @
6450c122
...
@@ -374,20 +374,27 @@ class DefaultModelLoader(BaseModelLoader):
...
@@ -374,20 +374,27 @@ class DefaultModelLoader(BaseModelLoader):
self
.
load_config
,
self
.
load_config
,
)
)
model
.
load_weights
(
self
.
_get_all_weights
(
model_config
,
model
))
self
.
load_weights_and_postprocess
(
model
,
self
.
_get_all_weights
(
model_config
,
model
),
target_device
)
for
_
,
module
in
model
.
named_modules
():
quant_method
=
getattr
(
module
,
"quant_method"
,
None
)
if
quant_method
is
not
None
:
# When quant methods need to process weights after loading
# (for repacking, quantizing, etc), they expect parameters
# to be on the global target device. This scope is for the
# case where cpu offloading is used, where we will move the
# parameters onto device for processing and back off after.
with
device_loading_context
(
module
,
target_device
):
quant_method
.
process_weights_after_loading
(
module
)
return
model
.
eval
()
return
model
.
eval
()
@
staticmethod
def
load_weights_and_postprocess
(
model
,
weights
,
target_device
):
model
.
load_weights
(
weights
)
for
_
,
module
in
model
.
named_modules
():
quant_method
=
getattr
(
module
,
"quant_method"
,
None
)
if
quant_method
is
not
None
:
# When quant methods need to process weights after loading
# (for repacking, quantizing, etc), they expect parameters
# to be on the global target device. This scope is for the
# case where cpu offloading is used, where we will move the
# parameters onto device for processing and back off after.
with
device_loading_context
(
module
,
target_device
):
quant_method
.
process_weights_after_loading
(
module
)
class
LayeredModelLoader
(
DefaultModelLoader
):
class
LayeredModelLoader
(
DefaultModelLoader
):
"""Model loader that loads weights layer by layer so that one can quantize a
"""Model loader that loads weights layer by layer so that one can quantize a
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment