Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7cec75a7
Commit
7cec75a7
authored
Mar 13, 2026
by
liuchy5
Browse files
修改sparse_attn hip后端
parent
ce52b8a8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
7 deletions
+19
-7
vllm/model_executor/custom_op.py
vllm/model_executor/custom_op.py
+1
-1
vllm/model_executor/layers/sparse_attn_indexer.py
vllm/model_executor/layers/sparse_attn_indexer.py
+18
-6
No files found.
vllm/model_executor/custom_op.py
View file @
7cec75a7
...
...
@@ -184,7 +184,7 @@ class CustomOp(nn.Module):
return
self
.
maybe_compile
(
self
.
forward_native
,
enable
=
compile_native
)
if
current_platform
.
is_rocm
():
return
self
.
forward_
cuda
return
self
.
forward_
hip
elif
current_platform
.
is_cpu
():
return
self
.
forward_cpu
elif
current_platform
.
is_tpu
():
...
...
vllm/model_executor/layers/sparse_attn_indexer.py
View file @
7cec75a7
...
...
@@ -296,8 +296,7 @@ class SparseAttnIndexer(CustomOp):
if
current_platform
.
is_cuda
():
return
self
.
forward_cuda
(
hidden_states
,
q_fp8
,
k
,
weights
)
elif
current_platform
.
is_rocm
():
# return self.forward_hip(hidden_states, q_fp8, k, weights)
return
self
.
forward_cuda
(
hidden_states
,
q_fp8
,
k
,
weights
)
return
self
.
forward_hip
(
hidden_states
,
q_fp8
,
k
,
weights
)
else
:
raise
NotImplementedError
(
"SparseAttnIndexer native forward is only implemented for "
...
...
@@ -349,9 +348,22 @@ class SparseAttnIndexer(CustomOp):
self
.
max_model_len
,
self
.
max_total_seq_len
,
self
.
topk_indices_buffer
,
)
)
else
:
raise
RuntimeError
(
"Sparse attention indexer ROCm custom op requires ROCm "
"Aiter ops to be enabled."
return
torch
.
ops
.
vllm
.
sparse_attn_indexer
(
hidden_states
,
self
.
k_cache
.
prefix
,
self
.
k_cache
.
kv_cache
[
0
],
q_fp8
,
k
,
weights
,
self
.
quant_block_size
,
self
.
scale_fmt
,
self
.
topk_tokens
,
self
.
head_dim
,
self
.
max_model_len
,
self
.
max_total_seq_len
,
self
.
topk_indices_buffer
,
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment