Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1dfea5f4
Unverified
Commit
1dfea5f4
authored
Sep 19, 2025
by
Roger Wang
Committed by
GitHub
Sep 19, 2025
Browse files
[Bugfix][Perf] Misc fixes for Qwen3 VL (#25238)
Signed-off-by:
Roger Wang
<
hey@rogerw.io
>
parent
cea91a32
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
13 deletions
+12
-13
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+10
-13
vllm/model_executor/models/qwen3_vl_moe.py
vllm/model_executor/models/qwen3_vl_moe.py
+2
-0
No files found.
vllm/model_executor/models/qwen3_vl.py
View file @
1dfea5f4
...
@@ -1075,6 +1075,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1075,6 +1075,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
config
.
text_config
.
hidden_size
)
config
.
text_config
.
hidden_size
)
for
_
in
range
(
self
.
deepstack_num_level
)
for
_
in
range
(
self
.
deepstack_num_level
)
]
if
self
.
use_deepstack
else
None
]
if
self
.
use_deepstack
else
None
self
.
visual_dim
=
config
.
vision_config
.
out_hidden_size
self
.
multiscale_dim
=
self
.
visual_dim
*
self
.
deepstack_num_level
def
_get_deepstack_input_embeds
(
self
,
def
_get_deepstack_input_embeds
(
self
,
num_tokens
:
int
)
->
IntermediateTensors
:
num_tokens
:
int
)
->
IntermediateTensors
:
...
@@ -1313,12 +1315,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1313,12 +1315,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
]
]
multimodal_embeddings_cat
=
torch
.
cat
(
multimodal_embeddings
,
dim
=
0
)
multimodal_embeddings_cat
=
torch
.
cat
(
multimodal_embeddings
,
dim
=
0
)
visual_dim
=
multimodal_embeddings_cat
.
shape
[
-
1
]
//
(
self
.
deepstack_num_level
+
1
)
main_dim
,
multi_dim
=
visual_dim
,
visual_dim
*
self
.
deepstack_num_level
multimodal_embeddings_main
,
multimodal_embeddings_multiscale
=
torch
.
split
(
# noqa:E501
multimodal_embeddings_main
,
multimodal_embeddings_multiscale
=
torch
.
split
(
# noqa:E501
multimodal_embeddings_cat
,
[
main_dim
,
multi
_dim
],
multimodal_embeddings_cat
,
[
self
.
visual_dim
,
self
.
multiscale
_dim
],
dim
=-
1
)
dim
=-
1
)
multimodal_embeddings
=
torch
.
split
(
multimodal_embeddings_main
,
multimodal_embeddings
=
torch
.
split
(
multimodal_embeddings_main
,
...
@@ -1340,10 +1338,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1340,10 +1338,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
],
],
)
)
deepstack_input_embeds
=
deepstack_input_embeds
.
view
(
deepstack_input_embeds
=
deepstack_input_embeds
.
view
(
inputs_embeds
.
shape
[
0
],
self
.
deepstack_num_level
,
inputs_embeds
.
shape
[
0
],
self
.
deepstack_num_level
,
self
.
visual_dim
)
visual_dim
).
contiguous
()
deepstack_input_embeds
=
deepstack_input_embeds
.
permute
(
1
,
0
,
2
)
deepstack_input_embeds
=
deepstack_input_embeds
.
permute
(
1
,
0
,
2
).
contiguous
()
return
deepstack_input_embeds
,
multimodal_embeddings
return
deepstack_input_embeds
,
multimodal_embeddings
def
get_input_embeddings
(
def
get_input_embeddings
(
...
@@ -1353,7 +1349,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
...
@@ -1353,7 +1349,8 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
deepstack_input_embeds
=
None
deepstack_input_embeds
=
None
inputs_embeds
=
self
.
language_model
.
get_input_embeddings
(
input_ids
)
inputs_embeds
=
self
.
language_model
.
get_input_embeddings
(
input_ids
)
if
multimodal_embeddings
is
not
None
and
self
.
use_deepstack
:
if
multimodal_embeddings
is
not
None
:
if
self
.
use_deepstack
:
deepstack_input_embeds
,
multimodal_embeddings
=
self
.
_compute_deepstack_embeds
(
# noqa:E501
deepstack_input_embeds
,
multimodal_embeddings
=
self
.
_compute_deepstack_embeds
(
# noqa:E501
input_ids
,
inputs_embeds
,
multimodal_embeddings
)
input_ids
,
inputs_embeds
,
multimodal_embeddings
)
inputs_embeds
=
merge_multimodal_embeddings
(
inputs_embeds
=
merge_multimodal_embeddings
(
...
...
vllm/model_executor/models/qwen3_vl_moe.py
View file @
1dfea5f4
...
@@ -344,3 +344,5 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
...
@@ -344,3 +344,5 @@ class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration):
config
.
text_config
.
hidden_size
)
config
.
text_config
.
hidden_size
)
for
_
in
range
(
self
.
deepstack_num_level
)
for
_
in
range
(
self
.
deepstack_num_level
)
]
if
self
.
use_deepstack
else
None
]
if
self
.
use_deepstack
else
None
self
.
visual_dim
=
config
.
vision_config
.
out_hidden_size
self
.
multiscale_dim
=
self
.
visual_dim
*
self
.
deepstack_num_level
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment