Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
e5afb88b
"cmake/vscode:/vscode.git/clone" did not exist on "9a7235faf2835d424c4587e703024248e6b9f465"
Unverified
Commit
e5afb88b
authored
Jun 24, 2025
by
Yuhong Guo
Committed by
GitHub
Jun 23, 2025
Browse files
Support weight loading without mmap (#7469)
parent
e5ddeb04
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
21 additions
and
2 deletions
+21
-2
python/sglang/srt/managers/schedule_batch.py
python/sglang/srt/managers/schedule_batch.py
+1
-0
python/sglang/srt/model_loader/loader.py
python/sglang/srt/model_loader/loader.py
+8
-1
python/sglang/srt/model_loader/weight_utils.py
python/sglang/srt/model_loader/weight_utils.py
+6
-1
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+6
-0
No files found.
python/sglang/srt/managers/schedule_batch.py
View file @
e5afb88b
...
...
@@ -101,6 +101,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
"torchao_config"
,
"triton_attention_reduce_in_fp32"
,
"num_reserved_decode_tokens"
,
"weight_loader_disable_mmap"
,
]
# Put some global args for easy access
...
...
python/sglang/srt/model_loader/loader.py
View file @
e5afb88b
...
...
@@ -337,7 +337,14 @@ class DefaultModelLoader(BaseModelLoader):
hf_weights_files
,
)
elif
use_safetensors
:
weights_iterator
=
safetensors_weights_iterator
(
hf_weights_files
)
from
sglang.srt.managers.schedule_batch
import
global_server_args_dict
weight_loader_disable_mmap
=
global_server_args_dict
.
get
(
"weight_loader_disable_mmap"
)
weights_iterator
=
safetensors_weights_iterator
(
hf_weights_files
,
disable_mmap
=
weight_loader_disable_mmap
)
else
:
weights_iterator
=
pt_weights_iterator
(
hf_weights_files
)
...
...
python/sglang/srt/model_loader/weight_utils.py
View file @
e5afb88b
...
...
@@ -422,6 +422,7 @@ def safetensors_weights_iterator(
hf_weights_files
:
List
[
str
],
is_all_weights_sharded
:
bool
=
False
,
decryption_key
:
Optional
[
str
]
=
None
,
disable_mmap
:
bool
=
False
,
)
->
Generator
[
Tuple
[
str
,
torch
.
Tensor
],
None
,
None
]:
"""Iterate over the weights in the model safetensor files.
...
...
@@ -443,7 +444,11 @@ def safetensors_weights_iterator(
disable
=
not
enable_tqdm
,
bar_format
=
_BAR_FORMAT
,
):
result
=
safetensors
.
torch
.
load_file
(
st_file
,
device
=
"cpu"
)
if
disable_mmap
:
with
open
(
st_file
,
"rb"
)
as
f
:
result
=
safetensors
.
torch
.
load
(
f
.
read
())
else
:
result
=
safetensors
.
torch
.
load_file
(
st_file
,
device
=
"cpu"
)
for
name
,
param
in
result
.
items
():
yield
name
,
param
...
...
python/sglang/srt/server_args.py
View file @
e5afb88b
...
...
@@ -237,6 +237,7 @@ class ServerArgs:
# For model weight update
custom_weight_loader
:
Optional
[
List
[
str
]]
=
None
weight_loader_disable_mmap
:
bool
=
False
def
__post_init__
(
self
):
# Expert parallelism
...
...
@@ -1599,6 +1600,11 @@ class ServerArgs:
default
=
None
,
help
=
"The custom dataloader which used to update the model. Should be set with a valid import path, such as my_package.weight_load_func"
,
)
parser
.
add_argument
(
"--weight-loader-disable-mmap"
,
action
=
"store_true"
,
help
=
"Disable mmap while loading weight using safetensors."
,
)
@
classmethod
def
from_cli_args
(
cls
,
args
:
argparse
.
Namespace
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment