Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3d4f8753
"docs/pages/components/vscode:/vscode.git/clone" did not exist on "222c2e85c83c1e5b9bac30b3a48ceed08a6cce48"
Commit
3d4f8753
authored
Feb 24, 2026
by
laibao
Browse files
feat(kvpress): KVCacheManager 按 num_kv_tokens 分配 slots
parent
b0911b24
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
1 deletion
+16
-1
vllm/v1/core/kv_cache_manager.py
vllm/v1/core/kv_cache_manager.py
+16
-1
No files found.
vllm/v1/core/kv_cache_manager.py
View file @
3d4f8753
...
...
@@ -6,6 +6,8 @@ from collections.abc import Sequence
from
dataclasses
import
dataclass
from
typing
import
Literal
,
overload
import
vllm.envs
as
envs
from
vllm.platforms
import
current_platform
from
vllm.distributed.kv_events
import
KVCacheEvent
from
vllm.logger
import
init_logger
from
vllm.v1.core.kv_cache_coordinator
import
get_kv_cache_coordinator
...
...
@@ -307,7 +309,20 @@ class KVCacheManager:
num_local_computed_tokens
+
num_external_computed_tokens
,
self
.
max_model_len
,
)
num_tokens_main_model
=
total_computed_tokens
+
num_new_tokens
if
envs
.
VLLM_ENABLE_KV_COMPRESSION
and
current_platform
.
is_cuda_alike
():
# KV compression decouples logical token positions from KV cache
# positions. Allocate based on the KV cache length (plus the tokens
# scheduled for this step, which are temporarily written to cache).
#
# NOTE: For new requests, request.num_kv_tokens may not have been
# initialized yet (e.g., cache-hit / connector paths). Fall back
# to the computed-token count in that case.
kv_computed_tokens
=
int
(
request
.
num_kv_tokens
)
if
kv_computed_tokens
==
0
and
total_computed_tokens
>
0
:
kv_computed_tokens
=
int
(
total_computed_tokens
)
num_tokens_main_model
=
kv_computed_tokens
+
num_new_tokens
else
:
num_tokens_main_model
=
total_computed_tokens
+
num_new_tokens
num_tokens_need_slot
=
min
(
num_tokens_main_model
+
num_lookahead_tokens
,
self
.
max_model_len
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment