Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1723ef1a
Unverified
Commit
1723ef1a
authored
Aug 15, 2025
by
eigen
Committed by
GitHub
Aug 15, 2025
Browse files
minor: zero workspace buffer init for flashinfer trtllm-gen attn (#22603)
parent
00d6cba0
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4 additions
and
4 deletions
+4
-4
tests/kernels/attention/test_flashinfer_trtllm_attention.py
tests/kernels/attention/test_flashinfer_trtllm_attention.py
+2
-2
vllm/attention/backends/flashinfer.py
vllm/attention/backends/flashinfer.py
+1
-1
vllm/v1/attention/backends/flashinfer.py
vllm/v1/attention/backends/flashinfer.py
+1
-1
No files found.
tests/kernels/attention/test_flashinfer_trtllm_attention.py
View file @
1723ef1a
...
...
@@ -113,7 +113,7 @@ def test_flashinfer_trtllm_decode_with_baseline(
kv_indices
=
torch
.
tensor
(
kv_indices
,
dtype
=
torch
.
int32
)
kv_last_page_lens
=
torch
.
tensor
(
kv_last_page_lens
,
dtype
=
torch
.
int32
)
workspace_buffer
=
torch
.
empty
(
128
*
1024
*
1024
,
dtype
=
torch
.
int8
)
workspace_buffer
=
torch
.
zeros
(
128
*
1024
*
1024
,
dtype
=
torch
.
int8
)
wrapper
=
flashinfer
.
BatchDecodeWithPagedKVCacheWrapper
(
workspace_buffer
,
kv_layout
,
...
...
@@ -247,7 +247,7 @@ def test_flashinfer_trtllm_prefill_with_baseline(
kv_indices
=
torch
.
tensor
(
kv_indices
,
dtype
=
torch
.
int32
)
kv_last_page_lens
=
torch
.
tensor
(
kv_last_page_lens
,
dtype
=
torch
.
int32
)
workspace_buffer
=
torch
.
empty
(
128
*
1024
*
1024
,
dtype
=
torch
.
int8
)
workspace_buffer
=
torch
.
zeros
(
128
*
1024
*
1024
,
dtype
=
torch
.
int8
)
wrapper
=
flashinfer
.
BatchPrefillWithPagedKVCacheWrapper
(
workspace_buffer
,
kv_layout
)
wrapper
.
plan
(
q_indptr
,
...
...
vllm/attention/backends/flashinfer.py
View file @
1723ef1a
...
...
@@ -203,7 +203,7 @@ class FlashInferState(AttentionState):
def
_get_workspace_buffer
(
self
):
if
self
.
_workspace_buffer
is
None
:
self
.
_workspace_buffer
=
torch
.
empty
(
self
.
_workspace_buffer
=
torch
.
zeros
(
FLASHINFER_WORKSPACE_BUFFER_SIZE
,
dtype
=
torch
.
uint8
,
device
=
self
.
runner
.
device
)
...
...
vllm/v1/attention/backends/flashinfer.py
View file @
1723ef1a
...
...
@@ -252,7 +252,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
def
_get_workspace_buffer
(
self
):
if
self
.
_workspace_buffer
is
None
:
self
.
_workspace_buffer
=
torch
.
empty
(
self
.
_workspace_buffer
=
torch
.
zeros
(
FLASHINFER_WORKSPACE_BUFFER_SIZE
,
dtype
=
torch
.
uint8
,
device
=
self
.
device
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment