Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
734f52d8
"vllm/vscode:/vscode.git/clone" did not exist on "e0c910bb89e45f4a2a976dc3c76248bbdea854e0"
Commit
734f52d8
authored
Jan 05, 2026
by
zhuwenwen
Browse files
update sparse_attn_indexer
parent
f441aca2
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
32 deletions
+15
-32
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+15
-32
No files found.
vllm/model_executor/models/deepseek_v2.py
View file @
734f52d8
...
...
@@ -872,38 +872,21 @@ class Indexer(nn.Module):
-
1
)
*
q_scale
*
self
.
softmax_scale
*
self
.
n_head
**-
0.5
weights
=
weights
.
squeeze
(
-
1
)
if
not
current_platform
.
is_rocm
()
or
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
.
split
(
':'
)[
0
]
==
"gfx938"
:
return
torch
.
ops
.
vllm
.
sparse_attn_indexer
(
hidden_states
,
self
.
k_cache
.
prefix
,
self
.
k_cache
.
kv_cache
[
0
],
q_fp8
,
k
,
weights
,
self
.
quant_block_size
,
self
.
scale_fmt
,
self
.
topk_tokens
,
self
.
head_dim
,
self
.
max_model_len
,
self
.
max_total_seq_len
,
self
.
topk_indices_buffer
,
)
else
:
return
torch
.
ops
.
vllm
.
sparse_attn_indexer
(
hidden_states
,
self
.
k_cache
.
prefix
,
self
.
k_cache
.
kv_cache
[
0
],
q
,
k
,
weights
,
self
.
quant_block_size
,
self
.
scale_fmt
,
self
.
topk_tokens
,
self
.
head_dim
,
self
.
max_model_len
,
self
.
max_total_seq_len
,
self
.
topk_indices_buffer
,
)
return
torch
.
ops
.
vllm
.
sparse_attn_indexer
(
hidden_states
,
self
.
k_cache
.
prefix
,
self
.
k_cache
.
kv_cache
[
0
],
q_fp8
if
not
current_platform
.
is_rocm
()
or
torch
.
cuda
.
get_device_properties
(
"cuda"
).
gcnArchName
.
split
(
':'
)[
0
]
==
"gfx938"
else
q
,
k
,
weights
,
self
.
quant_block_size
,
self
.
scale_fmt
,
self
.
topk_tokens
,
self
.
head_dim
,
self
.
max_model_len
,
self
.
max_total_seq_len
,
self
.
topk_indices_buffer
,
)
class
DeepseekV2MLAAttention
(
nn
.
Module
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment