Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
af8486de
Unverified
Commit
af8486de
authored
Feb 06, 2025
by
Sanju C Sudhakaran
Committed by
GitHub
Feb 05, 2025
Browse files
[Hardware][Intel-Gaudi] Enable FusedSDPA support for Intel Gaudi (HPU)
parent
4c3aac51
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
1 deletion
+11
-1
vllm/attention/backends/hpu_attn.py
vllm/attention/backends/hpu_attn.py
+11
-1
No files found.
vllm/attention/backends/hpu_attn.py
View file @
af8486de
...
...
@@ -10,7 +10,8 @@ from typing import Any, Dict, List, Optional, Tuple, Type
import
torch
import
vllm_hpu_extension.ops
as
ops
from
vllm_hpu_extension.utils
import
Matmul
,
Softmax
,
VLLMKVCache
from
vllm_hpu_extension.utils
import
(
Matmul
,
ModuleFusedSDPA
,
Softmax
,
VLLMKVCache
)
from
vllm.attention.backends.abstract
import
(
AttentionBackend
,
AttentionImpl
,
AttentionLayer
,
...
...
@@ -137,9 +138,17 @@ class HPUAttentionImpl(AttentionImpl, torch.nn.Module):
self
.
prefill_usefusedsdpa
=
os
.
getenv
(
'VLLM_PROMPT_USE_FUSEDSDPA'
,
'0'
).
lower
()
in
[
'1'
,
'true'
]
self
.
fused_scaled_dot_product_attention
=
None
if
self
.
prefill_usefusedsdpa
:
assert
alibi_slopes
is
None
,
\
'Prefill with FusedSDPA not supported with alibi slopes!'
try
:
from
habana_frameworks.torch.hpex.kernels
import
FusedSDPA
self
.
fused_scaled_dot_product_attention
=
ModuleFusedSDPA
(
FusedSDPA
)
except
ImportError
:
logger
().
warning
(
"Could not import HPU FusedSDPA kernel. "
"vLLM will use native implementation."
)
suppored_head_sizes
=
HPUPagedAttention
.
get_supported_head_sizes
()
if
head_size
not
in
suppored_head_sizes
:
...
...
@@ -227,6 +236,7 @@ class HPUAttentionImpl(AttentionImpl, torch.nn.Module):
matmul_qk_op
=
self
.
matmul_qk
,
softmax_op
=
self
.
softmax
,
matmul_av_op
=
self
.
matmul_av
,
fsdpa_op
=
self
.
fused_scaled_dot_product_attention
,
)
output
=
out
.
reshape
(
batch_size
,
seq_len
,
hidden_size
)
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment