Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
b5fd9506
Unverified
Commit
b5fd9506
authored
Jun 04, 2025
by
Chen Zhang
Committed by
GitHub
Jun 03, 2025
Browse files
[Bugfix] get_num_blocks_to_allocate with null_block (#19031)
Signed-off-by:
Chen Zhang
<
zhangch99@outlook.com
>
parent
135cf55c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
32 additions
and
4 deletions
+32
-4
tests/v1/core/test_specialized_manager.py
tests/v1/core/test_specialized_manager.py
+23
-0
vllm/v1/core/block_pool.py
vllm/v1/core/block_pool.py
+3
-2
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/kv_cache_utils.py
+3
-0
vllm/v1/core/single_type_kv_cache_manager.py
vllm/v1/core/single_type_kv_cache_manager.py
+3
-2
No files found.
tests/v1/core/test_specialized_manager.py
View file @
b5fd9506
...
...
@@ -144,3 +144,26 @@ def test_sliding_window_remove_skipped_blocks():
# of removed blocks should be [1003, 1002].
manager
.
remove_skipped_blocks
(
"test"
,
11
)
assert_block_id
(
block_table
,
[
null_block_id
]
*
4
+
original_block_ids
[
4
:])
def
test_get_num_blocks_to_allocate
():
block_size
=
2
sliding_window_spec
=
SlidingWindowSpec
(
block_size
=
block_size
,
num_kv_heads
=
1
,
head_size
=
1
,
dtype
=
torch
.
float32
,
sliding_window
=
4
,
# Placeholder value, not related to test result
use_mla
=
False
,
)
block_pool
=
BlockPool
(
num_gpu_blocks
=
100
,
enable_caching
=
True
)
manager
=
get_sliding_window_manager
(
sliding_window_spec
,
block_pool
)
cached_blocks_1
=
[
KVCacheBlock
(
i
+
1
)
for
i
in
range
(
10
)]
cached_blocks_2
=
[
block_pool
.
null_block
for
_
in
range
(
5
)
]
+
[
KVCacheBlock
(
i
+
1
)
for
i
in
range
(
5
)]
assert
manager
.
get_num_blocks_to_allocate
(
"1"
,
20
*
block_size
,
cached_blocks_1
)
==
20
assert
manager
.
get_num_blocks_to_allocate
(
"2"
,
20
*
block_size
,
cached_blocks_2
)
==
15
vllm/v1/core/block_pool.py
View file @
b5fd9506
...
...
@@ -63,6 +63,7 @@ class BlockPool:
# The ref_cnt of null_block is not maintained, needs special care to
# avoid freeing it.
self
.
null_block
=
self
.
free_block_queue
.
popleft
()
self
.
null_block
.
is_null
=
True
self
.
enable_kv_cache_events
=
enable_kv_cache_events
self
.
kv_event_queue
:
list
[
KVCacheEvent
]
=
[]
...
...
@@ -252,7 +253,7 @@ class BlockPool:
for
block
in
blocks
:
# ref_cnt=0 means this block is in the free list (i.e. eviction
# candidate), so remove it.
if
block
.
ref_cnt
==
0
and
block
!=
self
.
null_block
:
if
block
.
ref_cnt
==
0
and
not
block
.
is_null
:
self
.
free_block_queue
.
remove
(
block
)
block
.
incr_ref
()
...
...
@@ -267,7 +268,7 @@ class BlockPool:
for
block
in
ordered_blocks
:
block
.
decr_ref
()
# null_block should not be added to the free list.
if
block
.
ref_cnt
==
0
and
block
!=
self
.
null_block
:
if
block
.
ref_cnt
==
0
and
not
block
.
is_null
:
self
.
free_block_queue
.
append
(
block
)
def
reset_prefix_cache
(
self
)
->
bool
:
...
...
vllm/v1/core/kv_cache_utils.py
View file @
b5fd9506
...
...
@@ -125,6 +125,9 @@ class KVCacheBlock:
prev_free_block
:
Optional
[
"KVCacheBlock"
]
=
None
next_free_block
:
Optional
[
"KVCacheBlock"
]
=
None
# Whether the block is a null block that should never be cached.
is_null
:
bool
=
False
def
incr_ref
(
self
):
self
.
ref_cnt
+=
1
...
...
vllm/v1/core/single_type_kv_cache_manager.py
View file @
b5fd9506
...
...
@@ -83,8 +83,9 @@ class SingleTypeKVCacheManager(ABC):
# free queue and ref_cnt == 0), it will be changed from a free block
# to a computed block when the request is allocated, so we also count
# it as needed to be allocated.
num_evictable_computed_blocks
=
sum
(
blk
.
ref_cnt
==
0
for
blk
in
new_computed_blocks
)
num_evictable_computed_blocks
=
sum
(
blk
.
ref_cnt
==
0
and
not
blk
.
is_null
for
blk
in
new_computed_blocks
)
return
((
num_new_blocks
+
num_evictable_computed_blocks
)
*
self
.
num_kv_cache_groups
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment