Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bcb6f594
Unverified
Commit
bcb6f594
authored
Dec 08, 2025
by
Dazhi Jiang
Committed by
GitHub
Dec 08, 2025
Browse files
[Perf] Remove sync point in vit torch sdpa attn backend (#30232)
Signed-off-by:
Dazhi Jiang
<
dazhi_jiang@163.com
>
parent
cd00c443
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
24 deletions
+24
-24
vllm/attention/ops/vit_attn_wrappers.py
vllm/attention/ops/vit_attn_wrappers.py
+6
-6
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl.py
+6
-6
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_1v.py
+6
-6
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+6
-6
No files found.
vllm/attention/ops/vit_attn_wrappers.py
View file @
bcb6f594
...
...
@@ -93,12 +93,12 @@ def torch_sdpa_wrapper(
cu_seqlens
:
torch
.
Tensor
,
)
->
torch
.
Tensor
:
outputs
=
[]
for
i
in
range
(
1
,
len
(
cu_seqlens
)):
start_idx
=
cu_seqlens
[
i
-
1
]
end_idx
=
cu_seqlens
[
i
]
q_i
=
q
[:,
start_idx
:
end_idx
]
k_i
=
k
[:,
start_idx
:
end_idx
]
v_i
=
v
[:,
start_idx
:
end_idx
]
lens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
q_chunks
=
torch
.
split
(
q
,
lens
,
dim
=
1
)
k_chunks
=
torch
.
split
(
k
,
lens
,
dim
=
1
)
v_chunks
=
torch
.
split
(
v
,
lens
,
dim
=
1
)
for
q_i
,
k_i
,
v_i
in
zip
(
q_chunks
,
k_chunks
,
v_chunks
):
q_i
,
k_i
,
v_i
=
(
einops
.
rearrange
(
x
,
"b s h d -> b h s d"
)
for
x
in
[
q_i
,
k_i
,
v_i
]
)
...
...
vllm/model_executor/models/ernie45_vl.py
View file @
bcb6f594
...
...
@@ -289,12 +289,12 @@ class Ernie4_5_VisionAttention(nn.Module):
elif
self
.
attn_backend
==
AttentionBackendEnum
.
TORCH_SDPA
:
# Execute attention entry by entry for speed & less VRAM.
outputs
=
[]
for
i
in
range
(
1
,
len
(
cu_seqlens
)):
start_idx
=
cu_seqlens
[
i
-
1
]
end_idx
=
cu_seqlens
[
i
]
q_i
=
q
[:,
start_idx
:
end_idx
]
k_i
=
k
[:,
start_idx
:
end_idx
]
v_i
=
v
[:,
start_idx
:
end_idx
]
lens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
q_chunks
=
torch
.
split
(
q
,
lens
,
dim
=
1
)
k_chunks
=
torch
.
split
(
k
,
lens
,
dim
=
1
)
v_chunks
=
torch
.
split
(
v
,
lens
,
dim
=
1
)
for
q_i
,
k_i
,
v_i
in
zip
(
q_chunks
,
k_chunks
,
v_chunks
):
q_i
,
k_i
,
v_i
=
(
rearrange
(
x
,
"b s h d -> b h s d"
)
for
x
in
[
q_i
,
k_i
,
v_i
]
)
...
...
vllm/model_executor/models/glm4_1v.py
View file @
bcb6f594
...
...
@@ -377,12 +377,12 @@ class Glm4vVisionAttention(nn.Module):
elif
self
.
attn_backend
==
AttentionBackendEnum
.
TORCH_SDPA
:
# Execute attention entry by entry for speed & less VRAM.
outputs
=
[]
for
i
in
range
(
1
,
len
(
cu_seqlens
)):
start_idx
=
cu_seqlens
[
i
-
1
]
end_idx
=
cu_seqlens
[
i
]
q_i
=
q
[:,
start_idx
:
end_idx
]
k_i
=
k
[:,
start_idx
:
end_idx
]
v_i
=
v
[:,
start_idx
:
end_idx
]
lens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
q_chunks
=
torch
.
split
(
q
,
lens
,
dim
=
1
)
k_chunks
=
torch
.
split
(
k
,
lens
,
dim
=
1
)
v_chunks
=
torch
.
split
(
v
,
lens
,
dim
=
1
)
for
q_i
,
k_i
,
v_i
in
zip
(
q_chunks
,
k_chunks
,
v_chunks
):
q_i
,
k_i
,
v_i
=
(
rearrange
(
x
,
"b s h d -> b h s d"
)
for
x
in
[
q_i
,
k_i
,
v_i
]
)
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
bcb6f594
...
...
@@ -424,12 +424,12 @@ class Qwen2VisionAttention(nn.Module):
k
=
k
.
contiguous
()
v
=
v
.
contiguous
()
outputs
=
[]
for
i
in
range
(
1
,
len
(
cu_seqlens
)):
start_idx
=
cu_seqlens
[
i
-
1
]
end_idx
=
cu_seqlens
[
i
]
q_i
=
q
[:,
start_idx
:
end_idx
]
k_i
=
k
[:,
start_idx
:
end_idx
]
v_i
=
v
[:,
start_idx
:
end_idx
]
lens
=
(
cu_seqlens
[
1
:]
-
cu_seqlens
[:
-
1
]).
tolist
()
q_chunks
=
torch
.
split
(
q
,
lens
,
dim
=
1
)
k_chunks
=
torch
.
split
(
k
,
lens
,
dim
=
1
)
v_chunks
=
torch
.
split
(
v
,
lens
,
dim
=
1
)
for
q_i
,
k_i
,
v_i
in
zip
(
q_chunks
,
k_chunks
,
v_chunks
):
q_i
,
k_i
,
v_i
=
(
rearrange
(
x
,
"b s h d -> b h s d"
)
for
x
in
[
q_i
,
k_i
,
v_i
]
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment