Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
af7dfb0d
Unverified
Commit
af7dfb0d
authored
Sep 22, 2025
by
Isotr0py
Committed by
GitHub
Sep 21, 2025
Browse files
[Perf] Further optimization for Qwen3-VL `fast_pos_embed_interpolate` (#25347)
Signed-off-by:
Isotr0py
<
mozf@mail2.sysu.edu.cn
>
parent
1c3ffdbe
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
18 deletions
+32
-18
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+32
-18
No files found.
vllm/model_executor/models/qwen3_vl.py
View file @
af7dfb0d
...
...
@@ -405,25 +405,39 @@ class Qwen3_VisionTransformer(nn.Module):
dh
=
h_idxs
-
h_floor
dw
=
w_idxs
-
w_floor
w00
=
((
1
-
dh
)[:,
None
]
*
(
1
-
dw
)[
None
,
:]).
reshape
(
-
1
)
w01
=
((
1
-
dh
)[:,
None
]
*
dw
[
None
,
:]).
reshape
(
-
1
)
w10
=
(
dh
[:,
None
]
*
(
1
-
dw
)[
None
,
:]).
reshape
(
-
1
)
w11
=
(
dh
[:,
None
]
*
dw
[
None
,
:]).
reshape
(
-
1
)
idx00
=
(
h_floor
[:,
None
]
*
num_grid_per_side
+
w_floor
[
None
,
:]).
reshape
(
-
1
)
idx01
=
(
h_floor
[:,
None
]
*
num_grid_per_side
+
w_ceil
[
None
,
:]).
reshape
(
-
1
)
idx10
=
(
h_ceil
[:,
None
]
*
num_grid_per_side
+
w_floor
[
None
,
:]).
reshape
(
-
1
)
idx11
=
(
h_ceil
[:,
None
]
*
num_grid_per_side
+
w_ceil
[
None
,
:]).
reshape
(
-
1
)
indices
=
torch
.
stack
([
idx00
,
idx01
,
idx10
,
idx11
],
dim
=
0
)
# Create meshgrid view for all h, w vars
dh_grid
,
dw_grid
=
torch
.
meshgrid
(
dh
,
dw
,
indexing
=
'ij'
)
h_floor_grid
,
w_floor_grid
=
torch
.
meshgrid
(
h_floor
,
w_floor
,
indexing
=
'ij'
)
h_ceil_grid
,
w_ceil_grid
=
torch
.
meshgrid
(
h_ceil
,
w_ceil
,
indexing
=
'ij'
)
h_floor_grid_idx
=
h_floor_grid
*
num_grid_per_side
h_ceil_grid_idx
=
h_ceil_grid
*
num_grid_per_side
# original computation of weights
# w00 = (1 - dh_grid) * (1 - dw_grid)
# w01 = (1 - dh_grid) * dw_grid
# w10 = dh_grid * (1 - dw_grid)
# w11 = dh_grid * dw_grid
# we reuse w11 here to avoid duplicate
# dh_grid * dw_grid computation
w11
=
dh_grid
*
dw_grid
w10
=
dh_grid
-
w11
w01
=
dw_grid
-
w11
w00
=
1
-
dh_grid
-
dw_grid
+
w11
idx00
=
h_floor_grid_idx
+
w_floor_grid
idx01
=
h_floor_grid_idx
+
w_ceil_grid
idx10
=
h_ceil_grid_idx
+
w_floor_grid
idx11
=
h_ceil_grid_idx
+
w_ceil_grid
indices
=
torch
.
stack
([
idx00
,
idx01
,
idx10
,
idx11
],
dim
=
0
).
reshape
(
4
,
-
1
)
weights
=
torch
.
stack
([
w00
,
w01
,
w10
,
w11
],
dim
=
0
).
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
weights
=
weights
.
unsqueeze
(
-
1
)
dim
=
0
).
reshape
(
4
,
-
1
,
1
)
weights
=
weights
.
to
(
dtype
=
self
.
dtype
,
device
=
self
.
device
)
embeds
=
self
.
pos_embed
(
indices
)
weighted_embeds
=
embeds
*
weights
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment