Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d310e6de
Unverified
Commit
d310e6de
authored
May 09, 2025
by
Ning Xie
Committed by
GitHub
May 08, 2025
Browse files
[BUGFIX]: return fast when request requires prompt logprobs (#17251)
parent
5e6f9394
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
7 deletions
+7
-7
tests/v1/core/test_prefix_caching.py
tests/v1/core/test_prefix_caching.py
+2
-2
vllm/v1/core/kv_cache_manager.py
vllm/v1/core/kv_cache_manager.py
+5
-5
No files found.
tests/v1/core/test_prefix_caching.py
View file @
d310e6de
...
...
@@ -194,7 +194,7 @@ def test_prefill_plp():
all_token_ids
=
common_token_ids
+
unique_token_ids
req0
=
make_request
(
"0"
,
all_token_ids
,
prompt_logprobs
=
5
)
computed_blocks
,
num_computed_tokens
=
manager
.
get_computed_blocks
(
req0
)
assert
len
(
manager
.
req_to_block_hashes
[
req0
.
request_id
])
==
3
assert
len
(
manager
.
req_to_block_hashes
[
req0
.
request_id
])
==
0
assert
not
computed_blocks
.
blocks
assert
num_computed_tokens
==
0
blocks
=
manager
.
allocate_slots
(
req0
,
55
,
computed_blocks
)
...
...
@@ -256,7 +256,7 @@ def test_prefill_plp():
common_token_ids
+
unique_token_ids
,
prompt_logprobs
=
5
)
computed_blocks
,
num_computed_tokens
=
manager
.
get_computed_blocks
(
req2
)
assert
len
(
manager
.
req_to_block_hashes
[
req2
.
request_id
])
==
3
assert
len
(
manager
.
req_to_block_hashes
[
req2
.
request_id
])
==
0
assert
not
computed_blocks
.
blocks
assert
num_computed_tokens
==
0
blocks
=
manager
.
allocate_slots
(
req2
,
55
,
computed_blocks
)
...
...
vllm/v1/core/kv_cache_manager.py
View file @
d310e6de
...
...
@@ -126,8 +126,11 @@ class KVCacheManager:
- A list of blocks that are computed for the request.
- The number of computed tokens.
"""
if
not
self
.
enable_caching
:
# Prefix caching is disabled.
# Prefix caching is disabled or
# When the request requires prompt logprobs, we skip prefix caching.
if
(
not
self
.
enable_caching
or
request
.
sampling_params
.
prompt_logprobs
is
not
None
):
return
KVCacheBlocks
.
create_empty
(),
0
# The block hashes for the request may already be computed
...
...
@@ -141,9 +144,6 @@ class KVCacheManager:
if
self
.
log_stats
:
assert
self
.
prefix_cache_stats
is
not
None
self
.
prefix_cache_stats
.
requests
+=
1
# When the request requires prompt logprobs, we skip prefix caching.
if
request
.
sampling_params
.
prompt_logprobs
is
not
None
:
return
KVCacheBlocks
.
create_empty
(),
0
if
len
(
block_hashes
)
*
self
.
block_size
==
request
.
num_tokens
:
# When prompt length is divisible by the block size and all
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment