Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6dda13c8
Unverified
Commit
6dda13c8
authored
Jul 21, 2025
by
Woosuk Kwon
Committed by
GitHub
Jul 21, 2025
Browse files
[Misc] Add sliding window to flashinfer test (#21282)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
6b46c4b6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
31 additions
and
18 deletions
+31
-18
tests/kernels/attention/test_flashinfer.py
tests/kernels/attention/test_flashinfer.py
+31
-18
No files found.
tests/kernels/attention/test_flashinfer.py
View file @
6dda13c8
...
...
@@ -77,6 +77,7 @@ def ref_paged_attn(
@
pytest
.
mark
.
parametrize
(
"block_size"
,
BLOCK_SIZES
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
DTYPES
)
@
pytest
.
mark
.
parametrize
(
"soft_cap"
,
[
None
,
30.0
,
50.0
])
@
pytest
.
mark
.
parametrize
(
"sliding_window"
,
[
None
,
64
])
@
torch
.
inference_mode
def
test_flashinfer_decode_with_paged_kv
(
kv_lens
:
list
[
int
],
...
...
@@ -85,6 +86,7 @@ def test_flashinfer_decode_with_paged_kv(
dtype
:
torch
.
dtype
,
block_size
:
int
,
soft_cap
:
Optional
[
float
],
sliding_window
:
Optional
[
int
],
)
->
None
:
torch
.
set_default_device
(
"cuda"
)
current_platform
.
seed_everything
(
0
)
...
...
@@ -136,7 +138,8 @@ def test_flashinfer_decode_with_paged_kv(
use_tensor_cores
=
(
(
num_query_heads
//
num_kv_heads
)
>
4
)
)
wrapper
.
plan
(
kv_indptr
,
wrapper
.
plan
(
kv_indptr
,
kv_indices
,
kv_last_page_lens
,
num_query_heads
,
...
...
@@ -144,9 +147,11 @@ def test_flashinfer_decode_with_paged_kv(
head_size
,
block_size
,
"NONE"
,
window_left
=
sliding_window
-
1
if
sliding_window
is
not
None
else
-
1
,
q_data_type
=
dtype
,
kv_data_type
=
dtype
,
logits_soft_cap
=
soft_cap
)
logits_soft_cap
=
soft_cap
,
)
output
=
wrapper
.
run
(
query
,
key_value_cache
)
...
...
@@ -157,7 +162,8 @@ def test_flashinfer_decode_with_paged_kv(
kv_lens
=
kv_lens
,
block_tables
=
block_tables
,
scale
=
scale
,
soft_cap
=
soft_cap
)
soft_cap
=
soft_cap
,
sliding_window
=
sliding_window
)
torch
.
testing
.
assert_close
(
output
,
ref_output
,
atol
=
1e-2
,
rtol
=
1e-2
),
\
f
"
{
torch
.
max
(
torch
.
abs
(
output
-
ref_output
))
}
"
...
...
@@ -168,12 +174,17 @@ def test_flashinfer_decode_with_paged_kv(
@
pytest
.
mark
.
parametrize
(
"block_size"
,
BLOCK_SIZES
)
@
pytest
.
mark
.
parametrize
(
"dtype"
,
DTYPES
)
@
pytest
.
mark
.
parametrize
(
"soft_cap"
,
[
None
,
30.0
,
50.0
])
@
pytest
.
mark
.
parametrize
(
"sliding_window"
,
[
None
,
64
])
@
torch
.
inference_mode
def
test_flashinfer_prefill_with_paged_kv
(
seq_lens
:
list
[
tuple
[
int
,
int
]],
def
test_flashinfer_prefill_with_paged_kv
(
seq_lens
:
list
[
tuple
[
int
,
int
]],
num_heads
:
tuple
[
int
,
int
],
head_size
:
int
,
dtype
:
torch
.
dtype
,
head_size
:
int
,
dtype
:
torch
.
dtype
,
block_size
:
int
,
soft_cap
:
Optional
[
float
])
->
None
:
soft_cap
:
Optional
[
float
],
sliding_window
:
Optional
[
int
],
)
->
None
:
torch
.
set_default_device
(
"cuda"
)
current_platform
.
seed_everything
(
0
)
num_seqs
=
len
(
seq_lens
)
...
...
@@ -242,6 +253,7 @@ def test_flashinfer_prefill_with_paged_kv(seq_lens: list[tuple[int, int]],
num_kv_heads
,
head_size
,
block_size
,
window_left
=
sliding_window
-
1
if
sliding_window
is
not
None
else
-
1
,
q_data_type
=
dtype
,
kv_data_type
=
dtype
,
logits_soft_cap
=
soft_cap
,
...
...
@@ -259,7 +271,8 @@ def test_flashinfer_prefill_with_paged_kv(seq_lens: list[tuple[int, int]],
kv_lens
=
kv_lens
,
block_tables
=
block_tables
,
scale
=
scale
,
soft_cap
=
soft_cap
)
soft_cap
=
soft_cap
,
sliding_window
=
sliding_window
)
torch
.
testing
.
assert_close
(
output
,
ref_output
,
atol
=
5e-2
,
rtol
=
1e-2
),
\
f
"
{
torch
.
max
(
torch
.
abs
(
output
-
ref_output
))
}
"
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment