Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
73b35cca
Unverified
Commit
73b35cca
authored
Feb 03, 2025
by
Russell Bryant
Committed by
GitHub
Feb 03, 2025
Browse files
[Core] Improve hash collision avoidance in prefix caching (#12621)
Signed-off-by:
Russell Bryant
<
rbryant@redhat.com
>
parent
5095e966
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
45 additions
and
10 deletions
+45
-10
tests/core/block/test_prefix_caching_block.py
tests/core/block/test_prefix_caching_block.py
+2
-2
vllm/core/block/prefix_caching_block.py
vllm/core/block/prefix_caching_block.py
+34
-8
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/kv_cache_utils.py
+9
-0
No files found.
tests/core/block/test_prefix_caching_block.py
View file @
73b35cca
...
...
@@ -65,8 +65,8 @@ class TestPrefixCachingBlock:
previous_block
=
MagicMock
(
spec
=
PrefixCachingBlock
)
prev_block_hash
=
random
.
randint
(
0
,
1000
)
previous_block
.
content_hash
=
(
prev_block_hash
if
prev_block_has_hash
else
None
)
previous_block
.
content_hash
=
(
prev_block_hash
if
prev_block_has_hash
else
hash
(
'
None
'
)
)
num_to_fill
=
block_size
if
is_curr_block_full
else
random
.
randint
(
0
,
block_size
-
1
)
...
...
vllm/core/block/prefix_caching_block.py
View file @
73b35cca
...
...
@@ -65,6 +65,15 @@ class PrefixCachingBlockAllocator(BlockAllocator):
from 0 to num_blocks - 1.
"""
# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash
:
int
=
hash
(
'None'
)
# Implements Block.Factory.
def
__init__
(
self
,
num_blocks
:
int
,
...
...
@@ -122,7 +131,6 @@ class PrefixCachingBlockAllocator(BlockAllocator):
self
.
metric_data
=
CacheMetricData
()
# Implements Block.Factory.
def
_create_block
(
self
,
prev_block
:
Optional
[
Block
],
...
...
@@ -737,6 +745,14 @@ class PrefixCachingBlock(Block):
such as adapters that influence the block, apart from the token_ids.
"""
# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash
:
int
=
hash
(
'None'
)
def
__init__
(
self
,
prev_block
:
Optional
[
Block
],
...
...
@@ -891,13 +907,13 @@ class PrefixCachingBlock(Block):
is_first_block
=
self
.
_prev_block
is
None
prev_block_hash
=
(
None
if
is_first_block
else
self
.
_none_hash
if
is_first_block
else
self
.
_prev_block
.
content_hash
# type: ignore
)
# Previous block exists but does not yet have a hash.
# Return no hash in this case.
if
prev_block_hash
is
None
and
not
is_first_block
:
if
prev_block_hash
==
self
.
_none_hash
and
not
is_first_block
:
return
None
self
.
_cached_content_hash
=
PrefixCachingBlock
.
hash_block_tokens
(
...
...
@@ -907,8 +923,9 @@ class PrefixCachingBlock(Block):
extra_hash
=
self
.
_extra_hash
)
return
self
.
_cached_content_hash
@
staticmethod
def
hash_block_tokens
(
is_first_block
:
bool
,
@
classmethod
def
hash_block_tokens
(
cls
,
is_first_block
:
bool
,
prev_block_hash
:
Optional
[
int
],
cur_block_token_ids
:
List
[
int
],
extra_hash
:
Optional
[
int
]
=
None
)
->
int
:
...
...
@@ -929,7 +946,8 @@ class PrefixCachingBlock(Block):
Returns:
- int: The computed hash value for the block.
"""
assert
(
prev_block_hash
is
None
)
==
is_first_block
if
is_first_block
and
prev_block_hash
is
None
:
prev_block_hash
=
cls
.
_none_hash
return
hash
((
is_first_block
,
prev_block_hash
,
*
cur_block_token_ids
,
extra_hash
))
...
...
@@ -949,6 +967,14 @@ class ComputedBlocksTracker:
cached block hashes in the allocator.
"""
# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
_none_hash
:
int
=
hash
(
'None'
)
def
__init__
(
self
,
allocator
:
DeviceAwareBlockAllocator
,
...
...
@@ -994,7 +1020,7 @@ class ComputedBlocksTracker:
# We need to know the hash of the previous block to compute the hash of
# the current block so that blocks could be uniquely identified across
# sequences of prefixes.
prev_block_hash
=
(
None
if
cur_num_blocks_recorded
==
0
else
prev_block_hash
=
(
self
.
_none_hash
if
cur_num_blocks_recorded
==
0
else
block_hashes_recorded
[
-
1
])
# Only update the computed block hashes for the new blocks
for
i
in
range
(
cur_num_blocks_recorded
,
num_computed_blocks
):
...
...
@@ -1009,7 +1035,7 @@ class ComputedBlocksTracker:
# This has to be kept in sync with the allocator's hash
# calculation.
block_hash
=
PrefixCachingBlock
.
hash_block_tokens
(
is_first_block
=
prev_block_hash
is
None
,
is_first_block
=
prev_block_hash
==
self
.
_none_hash
,
prev_block_hash
=
prev_block_hash
,
cur_block_token_ids
=
block_token_ids
,
extra_hash
=
extra_hash
,
...
...
vllm/v1/core/kv_cache_utils.py
View file @
73b35cca
...
...
@@ -263,6 +263,15 @@ def hash_block_tokens(
The hash value of the block and the token ids in the block.
The entire tuple is used as the hash key of the block.
"""
if
not
parent_block_hash
:
# Note that we use 'None' as a string here instead of None because
# as of Python 3.12, hash(None) returns a constant predictable value.
# This could possibly make it easier to find and exploit hash
# collisions. 'None' as a string will be hashed differently per process,
# but consistently within the same process. This is the same as the
# behavior of None prior to Python 3.12.
parent_block_hash
=
hash
(
'None'
)
curr_block_token_ids_tuple
=
tuple
(
curr_block_token_ids
)
return
BlockHashType
(
hash
((
parent_block_hash
,
curr_block_token_ids_tuple
,
extra_keys
)),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment