Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e64fde4b
Unverified
Commit
e64fde4b
authored
May 25, 2024
by
leiwen83
Committed by
GitHub
May 24, 2024
Browse files
[Core][Bugfix]: fix prefix caching for blockv2 (#4764)
Co-authored-by:
Lei Wen
<
wenlei03@qiyi.com
>
parent
91977095
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
141 additions
and
17 deletions
+141
-17
tests/core/block/test_prefix_caching_block.py
tests/core/block/test_prefix_caching_block.py
+117
-0
vllm/core/block/prefix_caching_block.py
vllm/core/block/prefix_caching_block.py
+24
-17
No files found.
tests/core/block/test_prefix_caching_block.py
View file @
e64fde4b
...
...
@@ -410,6 +410,123 @@ class TestPrefixCachingBlockAllocator:
assert
(
len
(
res
)
==
zero_point_blocks
)
# Test case that assume those prompted block after first immutable would
# be freed into hashless allocator, while first immutable block get ref
# increased.
@
staticmethod
@
pytest
.
mark
.
parametrize
(
"num_blocks"
,
[
3
])
@
pytest
.
mark
.
parametrize
(
"block_size"
,
[
16
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
list
(
range
(
10
)))
def
test_alloc_promotion
(
num_blocks
:
int
,
block_size
:
int
,
seed
:
int
):
random
.
seed
(
seed
)
allocator
=
PrefixCachingBlockAllocator
(
num_blocks
=
num_blocks
,
block_size
=
block_size
)
token_ids
=
list
(
range
(
block_size
))
block
=
allocator
.
allocate_immutable
(
prev_block
=
None
,
token_ids
=
token_ids
)
assert
allocator
.
_refcounter
.
get
(
block
.
block_id
)
==
1
m
=
allocator
.
allocate_mutable
(
prev_block
=
None
)
block_id
=
m
.
block_id
for
i
in
range
(
block_size
):
m
.
append_token_ids
([
i
])
# After block get promoted to immutable from mutable, if there is
# already same content hash block, then it shall be released into
# hashless_allocator
# And first immutable block's ref get increased by 1
assert
m
.
block_id
==
block
.
block_id
assert
block_id
in
allocator
.
_hashless_allocator
.
_free_block_indices
assert
allocator
.
_refcounter
.
get
(
block
.
block_id
)
==
2
# Test case when eviction and allocation are mixed,
# make sure they work as expected
@
staticmethod
@
pytest
.
mark
.
parametrize
(
"num_blocks"
,
[
3
])
@
pytest
.
mark
.
parametrize
(
"block_size"
,
[
16
])
@
pytest
.
mark
.
parametrize
(
"seed"
,
list
(
range
(
10
)))
def
test_eviction_alloc_mixed
(
num_blocks
:
int
,
block_size
:
int
,
seed
:
int
):
random
.
seed
(
seed
)
all_blocks_list
=
[
i
for
i
in
range
(
num_blocks
)]
zero_ref
=
{
i
:
0
for
i
in
range
(
num_blocks
)}
allocator
=
PrefixCachingBlockAllocator
(
num_blocks
=
num_blocks
,
block_size
=
block_size
)
token_ids
=
list
(
range
(
num_blocks
*
block_size
))
# now we have num_blocks free blocks in hashless allocator
# with internal tracking list _blocks _cached_blocks and evictor
# empty and block's ref shall be 0
assert
list
(
allocator
.
_hashless_allocator
.
_free_block_indices
)
==
all_blocks_list
assert
len
(
allocator
.
_blocks
.
keys
())
==
0
assert
len
(
allocator
.
_cached_blocks
.
values
())
==
0
assert
len
(
allocator
.
evictor
.
free_table
.
keys
())
==
0
assert
allocator
.
_refcounter
.
_refcounts
==
zero_ref
# Allocate immutable chains with only one block residuled in
new_block
=
[]
for
i
in
range
(
num_blocks
):
block
=
allocator
.
allocate_immutable
(
prev_block
=
None
,
token_ids
=
token_ids
[
block_size
*
i
:
block_size
*
(
i
+
1
)])
new_block
.
append
(
block
)
# Free all blocks, and now all blocks shall be in the evictor
# there shall be no tracking data left in _blocks
# all blocks shall be tracked in _cached_blocks
# all blocks' ref shall be zero
for
block
in
new_block
:
allocator
.
free
(
block
)
assert
len
(
allocator
.
_blocks
.
keys
())
==
0
assert
len
(
allocator
.
_hashless_allocator
.
_free_block_indices
)
==
0
assert
list
(
allocator
.
_cached_blocks
.
values
())
==
all_blocks_list
assert
list
(
allocator
.
evictor
.
free_table
.
keys
())
==
all_blocks_list
assert
allocator
.
_refcounter
.
_refcounts
==
zero_ref
# Allocate a mutable block, and the first block shall be evicted
# and set its content hash into None, ref to 1
mutable
=
allocator
.
allocate_mutable
(
prev_block
=
None
)
assert
mutable
.
block_id
==
0
assert
mutable
.
content_hash
is
None
assert
0
in
allocator
.
_blocks
assert
allocator
.
_refcounter
.
get
(
0
)
==
1
assert
0
not
in
allocator
.
_cached_blocks
assert
0
not
in
allocator
.
evictor
# Since this mutable block has no hash yet, it shall be released into
# hashless allocator
allocator
.
free
(
mutable
)
assert
len
(
allocator
.
_blocks
.
keys
())
==
0
assert
allocator
.
_refcounter
.
_refcounts
==
zero_ref
assert
0
not
in
allocator
.
_cached_blocks
assert
0
not
in
allocator
.
evictor
assert
0
in
allocator
.
_hashless_allocator
.
_free_block_indices
# when allocate immutable with first block_size tokens, we
# shall get free block from hashless allocator, thus no block left
# in hashless
block
=
allocator
.
allocate_immutable
(
prev_block
=
None
,
token_ids
=
token_ids
[:
block_size
])
assert
block
.
block_id
==
0
assert
len
(
allocator
.
_hashless_allocator
.
_free_block_indices
)
==
0
assert
0
in
allocator
.
_blocks
assert
0
in
allocator
.
_cached_blocks
.
values
()
assert
allocator
.
_refcounter
.
get
(
0
)
==
1
assert
0
not
in
allocator
.
evictor
# allocate mutable block again, it shall be popped from evictor
mutable
=
allocator
.
allocate_mutable
(
prev_block
=
None
)
assert
len
(
allocator
.
_hashless_allocator
.
_free_block_indices
)
==
0
assert
mutable
.
block_id
not
in
allocator
.
evictor
.
free_table
assert
allocator
.
_refcounter
.
get
(
mutable
.
block_id
)
==
1
# Test case where two last accessed times are equal
@
staticmethod
@
pytest
.
mark
.
parametrize
(
"num_blocks"
,
[
1024
])
...
...
vllm/core/block/prefix_caching_block.py
View file @
e64fde4b
...
...
@@ -160,22 +160,18 @@ class PrefixCachingBlockAllocator(BlockAllocator):
# If the evictor has blocks available for eviction, evict a block
# and return it.
if
self
.
evictor
.
num_blocks
>
0
:
# here we get an evicted block, which is only added
# into evictor if its ref counter is 0
# and since its content would be changed, we need
# to remove it from _cached_blocks's tracking list
block_id
,
content_hash_to_evict
=
self
.
evictor
.
evict
()
# Here we may have scenario that several blocks have
# the same content hash, but due to the latter coming block
# is coming from mutable to immutable path, their physical
# block is added into evictor.
# However in this case, we shall not pop the _cached_blocks,
# as the same content is still used by others, which means
# we need to check ref before decide to pop the list.
_block_id
=
self
.
_cached_blocks
[
content_hash_to_evict
]
refcount
=
self
.
_refcounter
.
get
(
_block_id
)
if
refcount
==
1
:
self
.
_cached_blocks
.
pop
(
content_hash_to_evict
)
assert
self
.
_refcounter
.
get
(
_block_id
)
==
0
assert
_block_id
==
block_id
self
.
_cached_blocks
.
pop
(
content_hash_to_evict
)
self
.
_refcounter
.
incr
(
block_id
)
# the block comes from evictor already contain computed result
...
...
@@ -199,7 +195,11 @@ class PrefixCachingBlockAllocator(BlockAllocator):
def
_incr_refcount_cached_block
(
self
,
block
:
Block
,
block_id
:
BlockId
)
->
None
:
# since block is already computed, mark it
# now _incr_refcount_cached_block comes from two place
# allocate_immutable/promote_to_immutable_block where hit
# _cached_blocks hash key.
# In both cases, it means that already exists a already
# computed block which shared with block now
block
.
computed
=
True
refcount
=
self
.
_refcounter
.
incr
(
block_id
)
...
...
@@ -228,13 +228,19 @@ class PrefixCachingBlockAllocator(BlockAllocator):
block
:
Block
)
->
None
:
assert
isinstance
(
block
,
PrefixCachingBlock
)
if
block
.
content_hash
is
None
:
# if we comes from promote_to_immutable_block, it means that
# block.content_hash is never None.
# However we need to release the same content block, so that
# physical block could get reused.
if
block
.
block_id
!=
block_id
or
block
.
content_hash
is
None
:
refcount
=
self
.
_refcounter
.
get
(
block_id
)
# We have fork case where block would get more than one ref,
# so we cannot free it from tracking if ref cnt large than 1
if
refcount
<=
1
:
assert
block
.
block_id
is
not
None
refcount
=
self
.
_refcounter
.
get
(
block
.
block_id
)
if
refcount
==
1
:
del
self
.
_blocks
[
block
.
block_id
]
return
self
.
_hashless_allocator
.
free
(
block
)
refcount
=
self
.
_refcounter
.
decr
(
block_id
)
...
...
@@ -317,7 +323,8 @@ class PrefixCachingBlockAllocator(BlockAllocator):
if
block
.
content_hash
not
in
self
.
_cached_blocks
:
self
.
_cached_blocks
[
block
.
content_hash
]
=
block
.
block_id
else
:
self
.
_free_block_id_for_block
(
block
.
block_id
,
block
)
self
.
_free_block_id_for_block
(
self
.
_cached_blocks
[
block
.
content_hash
],
block
)
self
.
_incr_refcount_cached_block
(
block
,
self
.
_cached_blocks
[
block
.
content_hash
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment