Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
8c3230d8
Unverified
Commit
8c3230d8
authored
Dec 31, 2024
by
Chen Zhang
Committed by
GitHub
Dec 31, 2024
Browse files
[V1] Simpify vision block hash for prefix caching by removing offset from hash (#11646)
parent
2c571880
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
6 deletions
+6
-6
tests/v1/core/test_prefix_caching.py
tests/v1/core/test_prefix_caching.py
+4
-4
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/kv_cache_utils.py
+2
-2
No files found.
tests/v1/core/test_prefix_caching.py
View file @
8c3230d8
...
...
@@ -469,9 +469,9 @@ def test_mm_prefix_caching():
# Completed block should have hashes with extra keys.
assert
not
computed_blocks
assert
len
(
req0
.
kv_block_hashes
)
==
3
assert
req0
.
kv_block_hashes
[
0
].
extra_keys
==
(
(
"aaa"
,
0
),
)
assert
req0
.
kv_block_hashes
[
1
].
extra_keys
==
(
(
"aaa"
,
5
),
(
"bbb"
,
0
)
)
assert
req0
.
kv_block_hashes
[
2
].
extra_keys
==
(
(
"bbb"
,
2
),
)
assert
req0
.
kv_block_hashes
[
0
].
extra_keys
==
(
"aaa"
,
)
assert
req0
.
kv_block_hashes
[
1
].
extra_keys
==
(
"aaa"
,
"bbb"
)
assert
req0
.
kv_block_hashes
[
2
].
extra_keys
==
(
"bbb"
,
)
blocks
=
manager
.
allocate_slots
(
req0
,
59
,
computed_blocks
)
assert
[
b
.
block_id
for
b
in
blocks
]
==
[
0
,
1
,
2
,
3
,
4
]
...
...
@@ -485,7 +485,7 @@ def test_mm_prefix_caching():
# The just completed block should have hashes with extra keys.
assert
len
(
req0
.
kv_block_hashes
)
==
4
assert
req0
.
kv_block_hashes
[
3
].
extra_keys
==
(
(
"ccc"
,
0
),
)
assert
req0
.
kv_block_hashes
[
3
].
extra_keys
==
(
"ccc"
,
)
# Cache hit.
unique_token_ids
=
[
-
1
]
*
7
+
[
200
]
*
5
...
...
vllm/v1/core/kv_cache_utils.py
View file @
8c3230d8
...
...
@@ -218,8 +218,8 @@ def generate_block_hash_extra_keys(
continue
# The block contains the current mm input.
mm_start
=
max
(
0
,
start_token_idx
-
offset
)
extra_keys
.
append
((
mm_hashes
[
curr_mm_idx
],
mm_start
))
extra_keys
.
append
(
mm_hashes
[
curr_mm_idx
]
)
if
end_token_idx
>=
offset
+
length
:
# If this block contains the end of the current mm input,
# move to the next mm input as this block may also contain
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment