Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e5afb88b
Unverified
Commit
e5afb88b
authored
Jun 24, 2025
by
Yuhong Guo
Committed by
GitHub
Jun 23, 2025
Browse files
Support weight loading without mmap (#7469)
parent
e5ddeb04
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
21 additions
and
2 deletions
+21
-2
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+1
-0
python/sglang/srt/model_loader/loader.py
python/sglang/srt/model_loader/loader.py
+8
-1
python/sglang/srt/model_loader/weight_utils.py
python/sglang/srt/model_loader/weight_utils.py
+6
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-0
No files found.
python/sglang/srt/managers/schedule_batch.py
View file @
e5afb88b
...
...
@@ -101,6 +101,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"torchao_config"
,
"triton_attention_reduce_in_fp32"
,
"num_reserved_decode_tokens"
,
"weight_loader_disable_mmap"
,
]
# Put some global args for easy access
...
...
python/sglang/srt/model_loader/loader.py
View file @
e5afb88b
...
...
@@ -337,7 +337,14 @@ class DefaultModelLoader(BaseModelLoader):
hf_weights_files
,
)
elif
use_safetensors
:
weights_iterator
=
safetensors_weights_iterator
(
hf_weights_files
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
weight_loader_disable_mmap
=
global_server_args_dict
.
get
(
"weight_loader_disable_mmap"
)
weights_iterator
=
safetensors_weights_iterator
(
hf_weights_files
,
disable_mmap
=
weight_loader_disable_mmap
)
else
:
weights_iterator
=
pt_weights_iterator
(
hf_weights_files
)
...
...
python/sglang/srt/model_loader/weight_utils.py
View file @
e5afb88b
...
...
@@ -422,6 +422,7 @@ def safetensors_weights_iterator(
hf_weights_files
:
List
[
str
],
is_all_weights_sharded
:
bool
=
False
,
decryption_key
:
Optional
[
str
]
=
None
,
disable_mmap
:
bool
=
False
,
)
->
Generator
[
Tuple
[
str
,
torch
.
Tensor
],
None
,
None
]:
"""Iterate over the weights in the model safetensor files.
...
...
@@ -443,7 +444,11 @@ def safetensors_weights_iterator(
disable
=
not
enable_tqdm
,
bar_format
=
_BAR_FORMAT
,
):
result
=
safetensors
.
torch
.
load_file
(
st_file
,
device
=
"cpu"
)
if
disable_mmap
:
with
open
(
st_file
,
"rb"
)
as
f
:
result
=
safetensors
.
torch
.
load
(
f
.
read
())
else
:
result
=
safetensors
.
torch
.
load_file
(
st_file
,
device
=
"cpu"
)
for
name
,
param
in
result
.
items
():
yield
name
,
param
...
...
python/sglang/srt/server_args.py
View file @
e5afb88b
...
...
@@ -237,6 +237,7 @@ class ServerArgs:
# For model weight update
custom_weight_loader
:
Optional
[
List
[
str
]]
=
None
weight_loader_disable_mmap
:
bool
=
False
def
__post_init__
(
self
):
# Expert parallelism
...
...
@@ -1599,6 +1600,11 @@ class ServerArgs:
default
=
None
,
help
=
"The custom dataloader which used to update the model. Should be set with a valid import path, such as my_package.weight_load_func"
,
)
parser
.
add_argument
(
"--weight-loader-disable-mmap"
,
action
=
"store_true"
,
help
=
"Disable mmap while loading weight using safetensors."
,
)
@
classmethod
def
from_cli_args
(
cls
,
args
:
argparse
.
Namespace
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment