Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
73e9a2ef
Unverified
Commit
73e9a2ef
authored
Nov 06, 2025
by
Mick
Committed by
GitHub
Nov 06, 2025
Browse files
fix: tiny fix cli (#12744)
parent
837b08eb
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
206 additions
and
158 deletions
+206
-158
python/sglang/cli/generate.py
python/sglang/cli/generate.py
+8
-1
python/sglang/cli/main.py
python/sglang/cli/main.py
+0
-152
python/sglang/cli/serve.py
python/sglang/cli/serve.py
+34
-1
python/sglang/cli/utils.py
python/sglang/cli/utils.py
+152
-0
python/sglang/multimodal_gen/docs/cli.md
python/sglang/multimodal_gen/docs/cli.md
+2
-2
python/sglang/multimodal_gen/runtime/entrypoints/diffusion_generator.py
...multimodal_gen/runtime/entrypoints/diffusion_generator.py
+2
-2
python/sglang/multimodal_gen/runtime/platforms/cuda.py
python/sglang/multimodal_gen/runtime/platforms/cuda.py
+1
-0
python/sglang/multimodal_gen/runtime/server_args.py
python/sglang/multimodal_gen/runtime/server_args.py
+7
-0
No files found.
python/sglang/cli/generate.py
View file @
73e9a2ef
import
argparse
from
sglang.cli.
main
import
get_is_diffusion_model
,
get_model_path
from
sglang.cli.
utils
import
get_is_diffusion_model
,
get_model_path
from
sglang.multimodal_gen.runtime.entrypoints.cli.generate
import
(
add_multimodal_gen_generate_args
,
generate_cmd
,
...
...
@@ -8,6 +8,13 @@ from sglang.multimodal_gen.runtime.entrypoints.cli.generate import (
def
generate
(
args
,
extra_argv
):
# If help is requested, show generate subcommand help without requiring --model-path
if
any
(
h
in
extra_argv
for
h
in
(
"-h"
,
"--help"
)):
parser
=
argparse
.
ArgumentParser
(
description
=
"SGLang Multimodal Generation"
)
add_multimodal_gen_generate_args
(
parser
)
parser
.
parse_args
(
extra_argv
)
return
model_path
=
get_model_path
(
extra_argv
)
is_diffusion_model
=
get_is_diffusion_model
(
model_path
)
if
is_diffusion_model
:
...
...
python/sglang/cli/main.py
View file @
73e9a2ef
import
argparse
import
hashlib
import
json
import
logging
import
os
import
tempfile
from
typing
import
Optional
import
filelock
from
huggingface_hub
import
hf_hub_download
from
sglang.cli.generate
import
generate
from
sglang.cli.serve
import
serve
logger
=
logging
.
getLogger
(
__name__
)
temp_dir
=
tempfile
.
gettempdir
()
def
_get_lock
(
model_name_or_path
:
str
,
cache_dir
:
Optional
[
str
]
=
None
):
lock_dir
=
cache_dir
or
temp_dir
os
.
makedirs
(
os
.
path
.
dirname
(
lock_dir
),
exist_ok
=
True
)
model_name
=
model_name_or_path
.
replace
(
"/"
,
"-"
)
hash_name
=
hashlib
.
sha256
(
model_name
.
encode
()).
hexdigest
()
lock_file_name
=
hash_name
+
model_name
+
".lock"
lock
=
filelock
.
FileLock
(
os
.
path
.
join
(
lock_dir
,
lock_file_name
),
mode
=
0o666
)
return
lock
# Copied and adapted from hf_diffusers_utils.py
def
_maybe_download_model
(
model_name_or_path
:
str
,
local_dir
:
str
|
None
=
None
,
download
:
bool
=
True
)
->
str
:
"""
Resolve a model path. If it's a local directory, return it.
If it's a Hugging Face Hub ID, download only the config file
(`model_index.json` or `config.json`) and return its directory.
Args:
model_name_or_path: Local path or Hugging Face Hub model ID
local_dir: Local directory to save the downloaded file (if any)
download: Whether to download from Hugging Face Hub when needed
Returns:
Local directory path that contains the downloaded config file, or the original local directory.
"""
if
os
.
path
.
exists
(
model_name_or_path
):
logger
.
info
(
"Model already exists locally"
)
return
model_name_or_path
if
not
download
:
return
model_name_or_path
with
_get_lock
(
model_name_or_path
):
# Try `model_index.json` first (diffusers models)
try
:
logger
.
info
(
"Downloading model_index.json from HF Hub for %s..."
,
model_name_or_path
,
)
file_path
=
hf_hub_download
(
repo_id
=
model_name_or_path
,
filename
=
"model_index.json"
,
local_dir
=
local_dir
,
)
logger
.
info
(
"Downloaded to %s"
,
file_path
)
return
os
.
path
.
dirname
(
file_path
)
except
Exception
as
e_index
:
logger
.
debug
(
"model_index.json not found or failed: %s"
,
e_index
)
# Fallback to `config.json`
try
:
logger
.
info
(
"Downloading config.json from HF Hub for %s..."
,
model_name_or_path
)
file_path
=
hf_hub_download
(
repo_id
=
model_name_or_path
,
filename
=
"config.json"
,
local_dir
=
local_dir
,
)
logger
.
info
(
"Downloaded to %s"
,
file_path
)
return
os
.
path
.
dirname
(
file_path
)
except
Exception
as
e_config
:
raise
ValueError
(
(
"Could not find model locally at %s and failed to download "
"model_index.json/config.json from HF Hub: %s"
)
%
(
model_name_or_path
,
e_config
)
)
from
e_config
# Copied and adapted from hf_diffusers_utils.py
def
is_diffusers_model_path
(
model_path
:
str
)
->
True
:
"""
Verify if the model directory contains a valid diffusers configuration.
Args:
model_path: Path to the model directory
Returns:
The loaded model configuration as a dictionary if the model is a diffusers model
None if the model is not a diffusers model
"""
# Prefer model_index.json which indicates a diffusers pipeline
config_path
=
os
.
path
.
join
(
model_path
,
"model_index.json"
)
if
not
os
.
path
.
exists
(
config_path
):
return
False
# Load the config
with
open
(
config_path
)
as
f
:
config
=
json
.
load
(
f
)
# Verify diffusers version exists
if
"_diffusers_version"
not
in
config
:
return
False
return
True
def
get_is_diffusion_model
(
model_path
:
str
):
model_path
=
_maybe_download_model
(
model_path
)
is_diffusion_model
=
is_diffusers_model_path
(
model_path
)
if
is_diffusion_model
:
logger
.
info
(
"Diffusion model detected"
)
return
is_diffusion_model
def
get_model_path
(
extra_argv
):
# Find the model_path argument
model_path
=
None
for
i
,
arg
in
enumerate
(
extra_argv
):
if
arg
==
"--model-path"
:
if
i
+
1
<
len
(
extra_argv
):
model_path
=
extra_argv
[
i
+
1
]
break
elif
arg
.
startswith
(
"--model-path="
):
model_path
=
arg
.
split
(
"="
,
1
)[
1
]
break
if
model_path
is
None
:
# Fallback for --help or other cases where model-path is not provided
if
any
(
h
in
extra_argv
for
h
in
[
"-h"
,
"--help"
]):
raise
Exception
(
"Usage: sglang serve --model-path <model-name-or-path> [additional-arguments]
\n\n
"
"This command can launch either a standard language model server or a diffusion model server.
\n
"
"The server type is determined by the model path.
\n
"
"For specific arguments, please provide a model_path."
)
else
:
raise
Exception
(
"Error: --model-path is required. "
"Please provide the path to the model."
)
return
model_path
def
main
():
parser
=
argparse
.
ArgumentParser
()
...
...
python/sglang/cli/serve.py
View file @
73e9a2ef
...
...
@@ -4,16 +4,49 @@ import argparse
import
logging
import
os
from
sglang.cli.
main
import
get_is_diffusion_model
,
get_model_path
from
sglang.cli.
utils
import
get_is_diffusion_model
,
get_model_path
from
sglang.srt.utils
import
kill_process_tree
logger
=
logging
.
getLogger
(
__name__
)
def
serve
(
args
,
extra_argv
):
if
any
(
h
in
extra_argv
for
h
in
(
"-h"
,
"--help"
)):
# Since the server type is determined by the model, and we don't have a model path,
# we can't show the exact help. Instead, we show a general help message and then
# the help for both possible server types.
print
(
"Usage: sglang serve --model-path <model-name-or-path> [additional-arguments]
\n
"
)
print
(
"This command can launch either a standard language model server or a diffusion model server."
)
print
(
"The server type is determined by the model path.
\n
"
)
print
(
"For specific arguments, please provide a model_path."
)
print
(
"
\n
--- Help for Standard Language Model Server ---"
)
from
sglang.srt.server_args
import
prepare_server_args
try
:
prepare_server_args
([
"--help"
])
except
SystemExit
:
pass
# argparse --help calls sys.exit
print
(
"
\n
--- Help for Diffusion Model Server ---"
)
from
sglang.multimodal_gen.runtime.entrypoints.cli.serve
import
(
add_multimodal_gen_serve_args
,
)
parser
=
argparse
.
ArgumentParser
(
description
=
"SGLang Diffusion Model Serving"
)
add_multimodal_gen_serve_args
(
parser
)
parser
.
print_help
()
return
model_path
=
get_model_path
(
extra_argv
)
try
:
is_diffusion_model
=
get_is_diffusion_model
(
model_path
)
if
is_diffusion_model
:
logger
.
info
(
"Diffusion model detected"
)
if
is_diffusion_model
:
# Logic for Diffusion Models
from
sglang.multimodal_gen.runtime.entrypoints.cli.serve
import
(
...
...
python/sglang/cli/utils.py
0 → 100644
View file @
73e9a2ef
import
hashlib
import
json
import
logging
import
os
import
tempfile
from
typing
import
Optional
import
filelock
from
huggingface_hub
import
hf_hub_download
logger
=
logging
.
getLogger
(
__name__
)
temp_dir
=
tempfile
.
gettempdir
()
def
_get_lock
(
model_name_or_path
:
str
,
cache_dir
:
Optional
[
str
]
=
None
):
lock_dir
=
cache_dir
or
temp_dir
os
.
makedirs
(
os
.
path
.
dirname
(
lock_dir
),
exist_ok
=
True
)
model_name
=
model_name_or_path
.
replace
(
"/"
,
"-"
)
hash_name
=
hashlib
.
sha256
(
model_name
.
encode
()).
hexdigest
()
lock_file_name
=
hash_name
+
model_name
+
".lock"
lock
=
filelock
.
FileLock
(
os
.
path
.
join
(
lock_dir
,
lock_file_name
),
mode
=
0o666
)
return
lock
# Copied and adapted from hf_diffusers_utils.py
def
_maybe_download_model
(
model_name_or_path
:
str
,
local_dir
:
str
|
None
=
None
,
download
:
bool
=
True
)
->
str
:
"""
Resolve a model path. If it's a local directory, return it.
If it's a Hugging Face Hub ID, download only the config file
(`model_index.json` or `config.json`) and return its directory.
Args:
model_name_or_path: Local path or Hugging Face Hub model ID
local_dir: Local directory to save the downloaded file (if any)
download: Whether to download from Hugging Face Hub when needed
Returns:
Local directory path that contains the downloaded config file, or the original local directory.
"""
if
os
.
path
.
exists
(
model_name_or_path
):
logger
.
info
(
"Model already exists locally"
)
return
model_name_or_path
if
not
download
:
return
model_name_or_path
with
_get_lock
(
model_name_or_path
):
# Try `model_index.json` first (diffusers models)
try
:
logger
.
info
(
"Downloading model_index.json from HF Hub for %s..."
,
model_name_or_path
,
)
file_path
=
hf_hub_download
(
repo_id
=
model_name_or_path
,
filename
=
"model_index.json"
,
local_dir
=
local_dir
,
)
logger
.
info
(
"Downloaded to %s"
,
file_path
)
return
os
.
path
.
dirname
(
file_path
)
except
Exception
as
e_index
:
logger
.
debug
(
"model_index.json not found or failed: %s"
,
e_index
)
# Fallback to `config.json`
try
:
logger
.
info
(
"Downloading config.json from HF Hub for %s..."
,
model_name_or_path
)
file_path
=
hf_hub_download
(
repo_id
=
model_name_or_path
,
filename
=
"config.json"
,
local_dir
=
local_dir
,
)
logger
.
info
(
"Downloaded to %s"
,
file_path
)
return
os
.
path
.
dirname
(
file_path
)
except
Exception
as
e_config
:
raise
ValueError
(
(
"Could not find model locally at %s and failed to download "
"model_index.json/config.json from HF Hub: %s"
)
%
(
model_name_or_path
,
e_config
)
)
from
e_config
# Copied and adapted from hf_diffusers_utils.py
def
is_diffusers_model_path
(
model_path
:
str
)
->
True
:
"""
Verify if the model directory contains a valid diffusers configuration.
Args:
model_path: Path to the model directory
Returns:
The loaded model configuration as a dictionary if the model is a diffusers model
None if the model is not a diffusers model
"""
# Prefer model_index.json which indicates a diffusers pipeline
config_path
=
os
.
path
.
join
(
model_path
,
"model_index.json"
)
if
not
os
.
path
.
exists
(
config_path
):
return
False
# Load the config
with
open
(
config_path
)
as
f
:
config
=
json
.
load
(
f
)
# Verify diffusers version exists
if
"_diffusers_version"
not
in
config
:
return
False
return
True
def
get_is_diffusion_model
(
model_path
:
str
):
model_path
=
_maybe_download_model
(
model_path
)
is_diffusion_model
=
is_diffusers_model_path
(
model_path
)
if
is_diffusion_model
:
logger
.
info
(
"Diffusion model detected"
)
return
is_diffusion_model
def
get_model_path
(
extra_argv
):
# Find the model_path argument
model_path
=
None
for
i
,
arg
in
enumerate
(
extra_argv
):
if
arg
==
"--model-path"
:
if
i
+
1
<
len
(
extra_argv
):
model_path
=
extra_argv
[
i
+
1
]
break
elif
arg
.
startswith
(
"--model-path="
):
model_path
=
arg
.
split
(
"="
,
1
)[
1
]
break
if
model_path
is
None
:
# Fallback for --help or other cases where model-path is not provided
if
any
(
h
in
extra_argv
for
h
in
[
"-h"
,
"--help"
]):
raise
Exception
(
"Usage: sglang serve --model-path <model-name-or-path> [additional-arguments]
\n\n
"
"This command can launch either a standard language model server or a diffusion model server.
\n
"
"The server type is determined by the model path.
\n
"
"For specific arguments, please provide a model_path."
)
else
:
raise
Exception
(
"Error: --model-path is required. "
"Please provide the path to the model."
)
return
model_path
python/sglang/multimodal_gen/docs/cli.md
View file @
73e9a2ef
...
...
@@ -143,7 +143,7 @@ SERVER_ARGS=(
--ring-degree
=
2
)
sglang serve
$
SERVER_ARGS
sglang serve
"
${
SERVER_ARGS
[@]
}
"
```
-
**--model-path**
: Which model to load. The example uses
`Wan-AI/Wan2.1-T2V-1.3B-Diffusers`
.
...
...
@@ -265,7 +265,7 @@ SAMPLING_ARGS=(
--output-file-name
"A curious raccoon.mp4"
)
sglang generate
$
SERVER_ARGS
$
SAMPLING_ARGS
sglang generate
"
${
SERVER_ARGS
[@]
}
"
"
${
SAMPLING_ARGS
[@]
}
"
```
Once the generation task has finished, the server will shut down automatically.
...
...
python/sglang/multimodal_gen/runtime/entrypoints/diffusion_generator.py
View file @
73e9a2ef
...
...
@@ -258,10 +258,10 @@ class DiffGenerator:
data_type
=
(
DataType
.
IMAGE
if
self
.
server_args
.
pipeline_config
.
is_image_gen
or
sampling_params
.
num_frames
==
1
or
pretrained_
sampling_params
.
num_frames
==
1
else
DataType
.
VIDEO
)
sampling_params
.
data_type
=
data_type
pretrained_
sampling_params
.
data_type
=
data_type
pretrained_sampling_params
.
set_output_file_name
()
requests
:
list
[
Req
]
=
[]
...
...
python/sglang/multimodal_gen/runtime/platforms/cuda.py
View file @
73e9a2ef
...
...
@@ -217,6 +217,7 @@ class CudaPlatformBase(Platform):
elif
selected_backend
==
AttentionBackendEnum
.
FA3
:
if
is_blackwell
():
raise
ValueError
(
"The 'fa3' backend is not supported on Blackwell GPUs"
)
target_backend
=
AttentionBackendEnum
.
FA3
elif
selected_backend
:
raise
ValueError
(
f
"Invalid attention backend for
{
cls
.
device_name
}
"
)
else
:
...
...
python/sglang/multimodal_gen/runtime/server_args.py
View file @
73e9a2ef
...
...
@@ -777,6 +777,13 @@ class ServerArgs:
)
self
.
sp_degree
=
self
.
ulysses_degree
=
self
.
ring_degree
=
1
if
(
self
.
ring_degree
is
not
None
and
self
.
ring_degree
>
1
and
self
.
attention_backend
!=
"fa3"
):
raise
ValueError
(
"Ring Attention is only supported for fa3 backend for now"
)
if
self
.
sp_degree
==
-
1
:
# assume we leave all remaining gpus to sp
num_gpus_per_group
=
self
.
dp_size
*
self
.
tp_size
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment