Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f35f120d
Unverified
Commit
f35f120d
authored
Oct 14, 2025
by
Mick
Committed by
GitHub
Oct 13, 2025
Browse files
fix: fix video input for qwen3-vl (#11442)
parent
54a46a26
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
51 additions
and
7 deletions
+51
-7
python/sglang/srt/layers/rotary_embedding.py
python/sglang/srt/layers/rotary_embedding.py
+7
-0
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+4
-4
python/sglang/srt/model_executor/model_runner.py
python/sglang/srt/model_executor/model_runner.py
+16
-1
test/srt/test_vision_openai_server_a.py
test/srt/test_vision_openai_server_a.py
+21
-0
test/srt/test_vision_openai_server_common.py
test/srt/test_vision_openai_server_common.py
+3
-2
No files found.
python/sglang/srt/layers/rotary_embedding.py
View file @
f35f120d
...
@@ -1142,6 +1142,13 @@ class MRotaryEmbedding(RotaryEmbedding):
...
@@ -1142,6 +1142,13 @@ class MRotaryEmbedding(RotaryEmbedding):
second_per_grid_ts
:
Optional
[
torch
.
Tensor
]
=
None
,
second_per_grid_ts
:
Optional
[
torch
.
Tensor
]
=
None
,
**
kwargs
,
**
kwargs
,
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
)
->
Tuple
[
torch
.
Tensor
,
torch
.
Tensor
]:
if
(
model_type
.
startswith
(
"qwen3_vl"
)
or
model_type
.
startswith
(
"qwen3_vl_moe"
)
)
and
video_grid_thw
is
not
None
:
video_grid_thw
=
torch
.
repeat_interleave
(
video_grid_thw
,
video_grid_thw
[:,
0
],
dim
=
0
)
video_grid_thw
[:,
0
]
=
1
mrope_position_deltas
=
[]
mrope_position_deltas
=
[]
if
input_ids
is
not
None
and
(
if
input_ids
is
not
None
and
(
image_grid_thw
is
not
None
or
video_grid_thw
is
not
None
image_grid_thw
is
not
None
or
video_grid_thw
is
not
None
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
f35f120d
...
@@ -25,7 +25,6 @@ import signal
...
@@ -25,7 +25,6 @@ import signal
import
sys
import
sys
import
threading
import
threading
import
time
import
time
import
uuid
from
collections
import
deque
from
collections
import
deque
from
contextlib
import
nullcontext
from
contextlib
import
nullcontext
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -360,7 +359,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
...
@@ -360,7 +359,8 @@ class TokenizerManager(TokenizerCommunicatorMixin):
(
(
FreezeGCReq
,
FreezeGCReq
,
lambda
x
:
None
,
lambda
x
:
None
,
),
# For handling case when scheduler skips detokenizer and forwards back to the tokenizer manager, we ignore it.
),
# For handling case when scheduler skips detokenizer and forwards back to the tokenizer manager, we ignore it.
(
HealthCheckOutput
,
lambda
x
:
None
),
(
HealthCheckOutput
,
lambda
x
:
None
),
]
]
)
)
...
@@ -587,9 +587,9 @@ class TokenizerManager(TokenizerCommunicatorMixin):
...
@@ -587,9 +587,9 @@ class TokenizerManager(TokenizerCommunicatorMixin):
)
)
if
self
.
mm_processor
and
obj
.
contains_mm_input
():
if
self
.
mm_processor
and
obj
.
contains_mm_input
():
if
not
isinstance
(
obj
.
image_data
,
list
):
if
not
isinstance
(
obj
.
image_data
,
list
)
and
obj
.
image_data
:
obj
.
image_data
=
[
obj
.
image_data
]
obj
.
image_data
=
[
obj
.
image_data
]
if
not
isinstance
(
obj
.
audio_data
,
list
):
if
not
isinstance
(
obj
.
audio_data
,
list
)
and
obj
.
audio_data
:
obj
.
audio_data
=
[
obj
.
audio_data
]
obj
.
audio_data
=
[
obj
.
audio_data
]
mm_inputs
:
Dict
=
await
self
.
mm_processor
.
process_mm_data_async
(
mm_inputs
:
Dict
=
await
self
.
mm_processor
.
process_mm_data_async
(
image_data
=
obj
.
image_data
,
image_data
=
obj
.
image_data
,
...
...
python/sglang/srt/model_executor/model_runner.py
View file @
f35f120d
...
@@ -196,7 +196,6 @@ MAMBA_CACHE_SIZE_MAX_RUNNING_REQUESTS_RATIO = 3
...
@@ -196,7 +196,6 @@ MAMBA_CACHE_SIZE_MAX_RUNNING_REQUESTS_RATIO = 3
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
if
_is_npu
:
if
_is_npu
:
import
torch_npu
import
torch_npu
...
@@ -636,6 +635,22 @@ class ModelRunner:
...
@@ -636,6 +635,22 @@ class ModelRunner:
"Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes."
"Setting hicache_io_backend to vanilla I/O, which may lead to suboptimal performance with small page sizes."
)
)
if
self
.
model_config
.
hf_config
.
model_type
==
"qwen3_vl_moe"
:
if
(
quantization_config
:
=
getattr
(
self
.
model_config
.
hf_config
,
"quantization_config"
,
None
)
)
is
not
None
:
text_config
=
self
.
model_config
.
hf_text_config
weight_block_size_n
=
quantization_config
[
"weight_block_size"
][
0
]
if
(
text_config
.
moe_intermediate_size
//
(
self
.
tp_size
//
self
.
moe_ep_size
)
)
%
weight_block_size_n
!=
0
:
raise
ValueError
(
f
"For qwen3-vl-fp8 models, please make sure (
{
text_config
.
moe_intermediate_size
=
}
// (
{
self
.
tp_size
=
}
//
{
self
.
moe_ep_size
=
}
)) %
{
weight_block_size_n
=
}
== 0"
)
def
init_torch_distributed
(
self
):
def
init_torch_distributed
(
self
):
logger
.
info
(
"Init torch distributed begin."
)
logger
.
info
(
"Init torch distributed begin."
)
...
...
test/srt/test_vision_openai_server_a.py
View file @
f35f120d
...
@@ -50,6 +50,27 @@ class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
...
@@ -50,6 +50,27 @@ class TestQwen2VLServer(ImageOpenAITestMixin, VideoOpenAITestMixin):
cls
.
base_url
+=
"/v1"
cls
.
base_url
+=
"/v1"
class
TestQwen3VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
def
setUpClass
(
cls
):
cls
.
model
=
"Qwen/Qwen3-VL-30B-A3B-Instruct"
cls
.
base_url
=
DEFAULT_URL_FOR_TEST
cls
.
api_key
=
"sk-123456"
cls
.
process
=
popen_launch_server
(
cls
.
model
,
cls
.
base_url
,
timeout
=
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH
,
api_key
=
cls
.
api_key
,
other_args
=
[
"--mem-fraction-static"
,
"0.80"
,
"--cuda-graph-max-bs"
,
"4"
,
],
)
cls
.
base_url
+=
"/v1"
class
TestQwen2_5_VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
class
TestQwen2_5_VLServer
(
ImageOpenAITestMixin
,
VideoOpenAITestMixin
):
@
classmethod
@
classmethod
def
setUpClass
(
cls
):
def
setUpClass
(
cls
):
...
...
test/srt/test_vision_openai_server_common.py
View file @
f35f120d
...
@@ -494,7 +494,7 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
...
@@ -494,7 +494,7 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
**
(
self
.
get_vision_request_kwargs
()),
**
(
self
.
get_vision_request_kwargs
()),
)
)
video_response
=
response
.
choices
[
0
].
message
.
content
video_response
=
response
.
choices
[
0
].
message
.
content
.
lower
()
print
(
"-"
*
30
)
print
(
"-"
*
30
)
print
(
f
"Video response:
\n
{
video_response
}
"
)
print
(
f
"Video response:
\n
{
video_response
}
"
)
...
@@ -502,9 +502,10 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
...
@@ -502,9 +502,10 @@ class VideoOpenAITestMixin(TestOpenAIOmniServerBase):
# Add assertions to validate the video response
# Add assertions to validate the video response
assert
(
assert
(
"i
P
od"
in
video_response
"i
p
od"
in
video_response
or
"device"
in
video_response
or
"device"
in
video_response
or
"microphone"
in
video_response
or
"microphone"
in
video_response
or
"phone"
in
video_response
),
f
"video_response:
{
video_response
}
, should contain 'iPod' or 'device'"
),
f
"video_response:
{
video_response
}
, should contain 'iPod' or 'device'"
assert
(
assert
(
"man"
in
video_response
"man"
in
video_response
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment