Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
06eb6e84
Commit
06eb6e84
authored
Sep 22, 2025
by
zhuwenwen
Browse files
remove envs.VLLM_USE_PA_PRINT_PARAM
parent
b374a264
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
12 deletions
+0
-12
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/rocm_flash_attn.py
+0
-6
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flash_attn.py
+0
-6
No files found.
vllm/attention/backends/rocm_flash_attn.py
View file @
06eb6e84
...
...
@@ -1002,12 +1002,6 @@ class ROCmFlashAttentionImpl(AttentionImpl):
tree_attention_masks_tensor
=
decode_meta
.
tree_attention_masks_tensor
if
envs
.
VLLM_USE_FLASH_ATTN_PA
:
from
flash_attn
import
vllm_flash_attn_with_kvcache
if
envs
.
VLLM_USE_PA_PRINT_PARAM
:
print
(
"PA SIZE:"
)
print
(
f
"q.shape =
{
decode_query
.
unsqueeze
(
1
).
shape
}
, key_cache.shape =
{
key_cache
.
shape
}
, value_cache.shape =
{
value_cache
.
shape
}
, kv_cache_dtype =
{
self
.
kv_cache_dtype
}
"
)
print
(
f
"block_size=
{
block_size
}
, cache_seqlens.shape =
{
decode_meta
.
seq_lens_tensor
.
shape
}
, block_tables.shape =
{
decode_meta
.
block_tables
.
shape
}
"
)
print
(
f
"softmax_scale =
{
self
.
scale
:.
3
f
}
, window_size =
{
self
.
sliding_window
}
, softcap =
{
self
.
logits_soft_cap
}
, alibi_slopes =
{
self
.
alibi_slopes
}
"
)
# output[num_prefill_tokens:] = self.fa_decode_attn_func(
output
[
num_prefill_tokens
:]
=
vllm_flash_attn_with_kvcache
(
q
=
decode_query
.
unsqueeze
(
1
),
...
...
vllm/v1/attention/backends/flash_attn.py
View file @
06eb6e84
...
...
@@ -635,12 +635,6 @@ class FlashAttentionImpl(AttentionImpl):
num_splits
=
attn_metadata
.
max_num_splits
,
)
else
:
if
envs
.
VLLM_USE_PA_PRINT_PARAM
:
print
(
"PA SIZE:"
)
print
(
f
"q.shape =
{
query
[:
num_actual_tokens
].
shape
}
, key_cache.shape =
{
key_cache
.
shape
}
, value_cache.shape =
{
value_cache
.
shape
}
"
)
print
(
f
"cu_seqlens_q.shape =
{
cu_seqlens_q
.
shape
}
, max_seqlen_q =
{
max_seqlen_q
}
, seqused_k.shape =
{
seqused_k
.
shape
}
, max_seqlen_k =
{
max_seqlen_k
}
"
)
print
(
f
"softmax_scale =
{
self
.
scale
:.
3
f
}
, alibi_slopes =
{
self
.
alibi_slopes
}
, window_size =
{
self
.
sliding_window
}
, block_tables.shape =
{
block_table
.
shape
}
, softcap =
{
self
.
logits_soft_cap
}
, scheduler_metadata =
{
scheduler_metadata
}
"
)
vllm_flash_attn_varlen_func
(
q
=
query
[:
num_actual_tokens
],
k
=
key_cache
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment