Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f9b567df
Commit
f9b567df
authored
Jan 02, 2025
by
zhuwenwen
Browse files
add softcap interface for gemma-2
parent
6dc7aa42
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
0 deletions
+8
-0
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/rocm_flash_attn.py
+8
-0
No files found.
vllm/attention/backends/rocm_flash_attn.py
View file @
f9b567df
...
@@ -350,10 +350,17 @@ class ROCmFlashAttentionImpl(AttentionImpl):
...
@@ -350,10 +350,17 @@ class ROCmFlashAttentionImpl(AttentionImpl):
if
blocksparse_params
is
not
None
:
if
blocksparse_params
is
not
None
:
raise
ValueError
(
raise
ValueError
(
"ROCmFlashAttention does not support blocksparse attention."
)
"ROCmFlashAttention does not support blocksparse attention."
)
'''
if logits_soft_cap is not None:
if logits_soft_cap is not None:
raise ValueError(
raise ValueError(
"ROCmFlashAttention does not support attention logits soft "
"ROCmFlashAttention does not support attention logits soft "
"capping.")
"capping.")
'''
if
logits_soft_cap
is
None
:
# In flash-attn, setting logits_soft_cap as 0 means no soft cap.
logits_soft_cap
=
0
self
.
logits_soft_cap
=
logits_soft_cap
self
.
num_heads
=
num_heads
self
.
num_heads
=
num_heads
self
.
head_size
=
head_size
self
.
head_size
=
head_size
self
.
scale
=
float
(
scale
)
self
.
scale
=
float
(
scale
)
...
@@ -566,6 +573,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
...
@@ -566,6 +573,7 @@ class ROCmFlashAttentionImpl(AttentionImpl):
causal
=
True
,
causal
=
True
,
window_size
=
self
.
sliding_window
,
window_size
=
self
.
sliding_window
,
alibi_slopes
=
self
.
alibi_slopes
,
alibi_slopes
=
self
.
alibi_slopes
,
softcap
=
self
.
logits_soft_cap
,
)
)
# common code for prefill
# common code for prefill
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment