Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
0a56bcc0
Unverified
Commit
0a56bcc0
authored
Dec 13, 2024
by
Jani Monoses
Committed by
GitHub
Dec 13, 2024
Browse files
[Bugfix][Hardware][CPU] Enable Gemma2 with SDPA on CPU backend (#11169)
parent
0920ab91
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
4 additions
and
3 deletions
+4
-3
vllm/attention/backends/torch_sdpa.py
vllm/attention/backends/torch_sdpa.py
+4
-3
No files found.
vllm/attention/backends/torch_sdpa.py
View file @
0a56bcc0
...
...
@@ -13,7 +13,7 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
from
vllm.attention.backends.utils
import
CommonAttentionState
from
vllm.attention.ops.ipex_attn
import
PagedAttention
from
vllm.attention.ops.paged_attn
import
PagedAttentionMetadata
from
vllm.utils
import
make_tensor_with_pad
from
vllm.utils
import
make_tensor_with_pad
,
print_warning_once
from
vllm.worker.cpu_model_runner
import
ModelInputForCPUBuilder
...
...
@@ -395,7 +395,8 @@ class TorchSDPABackendImpl(AttentionImpl[TorchSDPAMetadata]):
raise
ValueError
(
"Torch SPDA does not support block-sparse attention."
)
if
logits_soft_cap
is
not
None
:
raise
ValueError
(
"Torch SPDA does not support logits soft cap."
)
print_warning_once
(
"Torch SPDA does not support logits soft cap. "
"Outputs may be slightly off."
)
self
.
num_heads
=
num_heads
self
.
head_size
=
head_size
self
.
scale
=
float
(
scale
)
...
...
@@ -619,7 +620,7 @@ class TorchSDPABackendImpl(AttentionImpl[TorchSDPAMetadata]):
value
[
None
,
:,
start_kv
:
end_kv
,
:],
attn_mask
=
mask
,
dropout_p
=
0.0
,
is_causal
=
causal_attn
and
not
self
.
need_mask
,
is_causal
=
causal_attn
and
mask
is
None
,
scale
=
self
.
scale
).
squeeze
(
0
).
movedim
(
query
.
dim
()
-
2
,
0
)
output
[
start_q
:
end_q
,
:,
:]
=
sub_out
start_q
,
start_kv
=
end_q
,
end_kv
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment