Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
1ce4878d
Unverified
Commit
1ce4878d
authored
Mar 14, 2025
by
wangyu
Committed by
GitHub
Mar 14, 2025
Browse files
feat(remote_model): support variable remote backend for model loader (#3964)
Signed-off-by:
wangyu
<
wangyu.steph@bytedance.com
>
parent
977d7cd2
Changes
22
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
50 additions
and
2 deletions
+50
-2
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+11
-0
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+39
-2
No files found.
python/sglang/srt/server_args.py
View file @
1ce4878d
...
@@ -30,6 +30,7 @@ from sglang.srt.utils import (
...
@@ -30,6 +30,7 @@ from sglang.srt.utils import (
is_flashinfer_available
,
is_flashinfer_available
,
is_hip
,
is_hip
,
is_port_available
,
is_port_available
,
is_remote_url
,
is_valid_ipv6_address
,
is_valid_ipv6_address
,
nullable_str
,
nullable_str
,
)
)
...
@@ -296,6 +297,9 @@ class ServerArgs:
...
@@ -296,6 +297,9 @@ class ServerArgs:
)
and
check_gguf_file
(
self
.
model_path
):
)
and
check_gguf_file
(
self
.
model_path
):
self
.
quantization
=
self
.
load_format
=
"gguf"
self
.
quantization
=
self
.
load_format
=
"gguf"
if
is_remote_url
(
self
.
model_path
):
self
.
load_format
=
"remote"
# AMD-specific Triton attention KV splits default number
# AMD-specific Triton attention KV splits default number
if
is_hip
():
if
is_hip
():
self
.
triton_attention_num_kv_splits
=
16
self
.
triton_attention_num_kv_splits
=
16
...
@@ -345,9 +349,11 @@ class ServerArgs:
...
@@ -345,9 +349,11 @@ class ServerArgs:
"safetensors"
,
"safetensors"
,
"npcache"
,
"npcache"
,
"dummy"
,
"dummy"
,
"sharded_state"
,
"gguf"
,
"gguf"
,
"bitsandbytes"
,
"bitsandbytes"
,
"layered"
,
"layered"
,
"remote"
,
],
],
help
=
"The format of the model weights to load. "
help
=
"The format of the model weights to load. "
'"auto" will try to load the weights in the safetensors format '
'"auto" will try to load the weights in the safetensors format '
...
@@ -1088,6 +1094,9 @@ class PortArgs:
...
@@ -1088,6 +1094,9 @@ class PortArgs:
# The port for nccl initialization (torch.dist)
# The port for nccl initialization (torch.dist)
nccl_port
:
int
nccl_port
:
int
# The ipc filename for rpc call between Engine and Scheduler
rpc_ipc_name
:
str
@
staticmethod
@
staticmethod
def
init_new
(
server_args
,
dp_rank
:
Optional
[
int
]
=
None
)
->
"PortArgs"
:
def
init_new
(
server_args
,
dp_rank
:
Optional
[
int
]
=
None
)
->
"PortArgs"
:
port
=
server_args
.
port
+
random
.
randint
(
100
,
1000
)
port
=
server_args
.
port
+
random
.
randint
(
100
,
1000
)
...
@@ -1106,6 +1115,7 @@ class PortArgs:
...
@@ -1106,6 +1115,7 @@ class PortArgs:
scheduler_input_ipc_name
=
f
"ipc://
{
tempfile
.
NamedTemporaryFile
(
delete
=
False
).
name
}
"
,
scheduler_input_ipc_name
=
f
"ipc://
{
tempfile
.
NamedTemporaryFile
(
delete
=
False
).
name
}
"
,
detokenizer_ipc_name
=
f
"ipc://
{
tempfile
.
NamedTemporaryFile
(
delete
=
False
).
name
}
"
,
detokenizer_ipc_name
=
f
"ipc://
{
tempfile
.
NamedTemporaryFile
(
delete
=
False
).
name
}
"
,
nccl_port
=
port
,
nccl_port
=
port
,
rpc_ipc_name
=
f
"ipc://
{
tempfile
.
NamedTemporaryFile
(
delete
=
False
).
name
}
"
,
)
)
else
:
else
:
# DP attention. Use TCP + port to handle both single-node and multi-node.
# DP attention. Use TCP + port to handle both single-node and multi-node.
...
@@ -1131,6 +1141,7 @@ class PortArgs:
...
@@ -1131,6 +1141,7 @@ class PortArgs:
scheduler_input_ipc_name
=
f
"tcp://
{
dist_init_host
}
:
{
scheduler_input_port
}
"
,
scheduler_input_ipc_name
=
f
"tcp://
{
dist_init_host
}
:
{
scheduler_input_port
}
"
,
detokenizer_ipc_name
=
f
"tcp://
{
dist_init_host
}
:
{
port_base
+
1
}
"
,
detokenizer_ipc_name
=
f
"tcp://
{
dist_init_host
}
:
{
port_base
+
1
}
"
,
nccl_port
=
port
,
nccl_port
=
port
,
rpc_ipc_name
=
f
"tcp://
{
dist_init_host
}
:
{
port_base
+
2
}
"
,
)
)
...
...
python/sglang/srt/utils.py
View file @
1ce4878d
...
@@ -42,6 +42,7 @@ from importlib.util import find_spec
...
@@ -42,6 +42,7 @@ from importlib.util import find_spec
from
io
import
BytesIO
from
io
import
BytesIO
from
multiprocessing
import
Pool
from
multiprocessing
import
Pool
from
multiprocessing.reduction
import
ForkingPickler
from
multiprocessing.reduction
import
ForkingPickler
from
pathlib
import
Path
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Protocol
,
Set
,
Tuple
,
Union
from
typing
import
Any
,
Callable
,
Dict
,
List
,
Optional
,
Protocol
,
Set
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
...
@@ -774,12 +775,22 @@ def get_zmq_socket(
...
@@ -774,12 +775,22 @@ def get_zmq_socket(
buf_size
=
-
1
buf_size
=
-
1
socket
=
context
.
socket
(
socket_type
)
socket
=
context
.
socket
(
socket_type
)
if
socket_type
==
zmq
.
PUSH
:
def
set_send_opt
():
socket
.
setsockopt
(
zmq
.
SNDHWM
,
0
)
socket
.
setsockopt
(
zmq
.
SNDHWM
,
0
)
socket
.
setsockopt
(
zmq
.
SNDBUF
,
buf_size
)
socket
.
setsockopt
(
zmq
.
SNDBUF
,
buf_size
)
elif
socket_type
==
zmq
.
PULL
:
def
set_recv_opt
():
socket
.
setsockopt
(
zmq
.
RCVHWM
,
0
)
socket
.
setsockopt
(
zmq
.
RCVHWM
,
0
)
socket
.
setsockopt
(
zmq
.
RCVBUF
,
buf_size
)
socket
.
setsockopt
(
zmq
.
RCVBUF
,
buf_size
)
if
socket_type
==
zmq
.
PUSH
:
set_send_opt
()
elif
socket_type
==
zmq
.
PULL
:
set_recv_opt
()
elif
socket_type
==
zmq
.
DEALER
:
set_send_opt
()
set_recv_opt
()
else
:
else
:
raise
ValueError
(
f
"Unsupported socket type:
{
socket_type
}
"
)
raise
ValueError
(
f
"Unsupported socket type:
{
socket_type
}
"
)
...
@@ -1572,3 +1583,29 @@ def add_prefix(name: str, prefix: str) -> str:
...
@@ -1572,3 +1583,29 @@ def add_prefix(name: str, prefix: str) -> str:
The string `prefix.name` if prefix is non-empty, otherwise just `name`.
The string `prefix.name` if prefix is non-empty, otherwise just `name`.
"""
"""
return
name
if
not
prefix
else
f
"
{
prefix
}
.
{
name
}
"
return
name
if
not
prefix
else
f
"
{
prefix
}
.
{
name
}
"
def
is_remote_url
(
url
:
Union
[
str
,
Path
])
->
bool
:
"""
Check if the URL is a remote URL of the format:
<connector_type>://<host>:<port>/<model_name>
"""
if
isinstance
(
url
,
Path
):
return
False
pattern
=
r
"(.+)://(.*)"
m
=
re
.
match
(
pattern
,
url
)
return
m
is
not
None
def
parse_connector_type
(
url
:
str
)
->
str
:
"""
Parse the connector type from the URL of the format:
<connector_type>://<path>
"""
pattern
=
r
"(.+)://(.*)"
m
=
re
.
match
(
pattern
,
url
)
if
m
is
None
:
return
""
return
m
.
group
(
1
)
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment