Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1be5a735
Unverified
Commit
1be5a735
authored
Jan 15, 2026
by
Michael Goin
Committed by
GitHub
Jan 15, 2026
Browse files
[UX] Use kv_offloading_backend=native by default (#32421)
Signed-off-by:
mgoin
<
mgoin64@gmail.com
>
parent
c36ba69b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
28 additions
and
15 deletions
+28
-15
tests/v1/kv_connector/unit/test_config.py
tests/v1/kv_connector/unit/test_config.py
+18
-1
vllm/config/cache.py
vllm/config/cache.py
+5
-5
vllm/config/vllm.py
vllm/config/vllm.py
+4
-6
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+1
-3
No files found.
tests/v1/kv_connector/unit/test_config.py
View file @
1be5a735
...
@@ -19,7 +19,8 @@ pytestmark = pytest.mark.cpu_test
...
@@ -19,7 +19,8 @@ pytestmark = pytest.mark.cpu_test
(
"lmcache"
,
4.0
,
1
,
1
,
"LMCacheConnectorV1"
,
4.0
),
(
"lmcache"
,
4.0
,
1
,
1
,
"LMCacheConnectorV1"
,
4.0
),
# size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
# size per rank: 8.0 GiB / (2 * 2) = 2.0 GiB
(
"lmcache"
,
8.0
,
2
,
2
,
"LMCacheConnectorV1"
,
2.0
),
(
"lmcache"
,
8.0
,
2
,
2
,
"LMCacheConnectorV1"
,
2.0
),
(
None
,
None
,
1
,
1
,
None
,
None
),
# When kv_offloading_size is None, offloading is disabled (backend is ignored)
(
"native"
,
None
,
1
,
1
,
None
,
None
),
],
],
)
)
def
test_kv_connector
(
def
test_kv_connector
(
...
@@ -62,3 +63,19 @@ def test_kv_connector(
...
@@ -62,3 +63,19 @@ def test_kv_connector(
assert
kv_connector_extra_config
[
"lmcache.max_local_cpu_size"
]
==
expected_bytes
assert
kv_connector_extra_config
[
"lmcache.max_local_cpu_size"
]
==
expected_bytes
# Existing config should be replaced
# Existing config should be replaced
assert
"existing_key"
not
in
kv_connector_extra_config
assert
"existing_key"
not
in
kv_connector_extra_config
def
test_kv_offloading_size_only_uses_native_default
():
"""Test that setting only kv_offloading_size enables native offloading."""
vllm_config
=
VllmConfig
(
cache_config
=
CacheConfig
(
kv_offloading_size
=
4.0
,
# kv_offloading_backend not set, should default to "native"
),
)
kv_transfer_config
=
vllm_config
.
kv_transfer_config
kv_connector_extra_config
=
kv_transfer_config
.
kv_connector_extra_config
assert
kv_transfer_config
.
kv_connector
==
"OffloadingConnector"
assert
kv_transfer_config
.
kv_role
==
"kv_both"
assert
kv_connector_extra_config
[
"cpu_bytes_to_use"
]
==
4.0
*
(
1
<<
30
)
vllm/config/cache.py
View file @
1be5a735
...
@@ -152,13 +152,13 @@ class CacheConfig:
...
@@ -152,13 +152,13 @@ class CacheConfig:
kv_offloading_size
:
float
|
None
=
None
kv_offloading_size
:
float
|
None
=
None
"""Size of the KV cache offloading buffer in GiB. When TP > 1, this is
"""Size of the KV cache offloading buffer in GiB. When TP > 1, this is
the total buffer size summed across all TP ranks. By default, this is set
the total buffer size summed across all TP ranks. By default, this is set
to None, which means no KV offloading is enabled. When set wi
th
to None, which means no KV offloading is enabled. When set
, vLLM
wi
ll
kv_offloading_backend, vLLM will
enable KV cache offloading to CPU"""
enable KV cache offloading to CPU
using the kv_offloading_backend.
"""
kv_offloading_backend
:
KVOffloadingBackend
|
None
=
None
kv_offloading_backend
:
KVOffloadingBackend
=
"native"
"""The backend to use for KV cache offloading. Supported backends include
"""The backend to use for KV cache offloading. Supported backends include
'native' (vLLM native CPU offloading), 'lmcache'
This option must be used
'native' (vLLM native CPU offloading), 'lmcache'
.
together with
kv_offloading_size."""
KV offloading is only activated when
kv_offloading_size
is set
."""
def
compute_hash
(
self
)
->
str
:
def
compute_hash
(
self
)
->
str
:
"""
"""
...
...
vllm/config/vllm.py
View file @
1be5a735
...
@@ -498,17 +498,15 @@ class VllmConfig:
...
@@ -498,17 +498,15 @@ class VllmConfig:
Right now, this function reads the offloading settings from
Right now, this function reads the offloading settings from
CacheConfig and configures the KVTransferConfig accordingly.
CacheConfig and configures the KVTransferConfig accordingly.
"""
"""
if
(
kv_offloading_backend
:
=
self
.
cache_config
.
kv_offloading_backend
)
is
None
:
# KV offloading is only activated when kv_offloading_size is set.
if
(
kv_offloading_size
:
=
self
.
cache_config
.
kv_offloading_size
)
is
None
:
return
return
kv_offloading_backend
=
self
.
cache_config
.
kv_offloading_backend
# If no KVTransferConfig is provided, create a default one.
# If no KVTransferConfig is provided, create a default one.
if
self
.
kv_transfer_config
is
None
:
if
self
.
kv_transfer_config
is
None
:
self
.
kv_transfer_config
=
KVTransferConfig
()
self
.
kv_transfer_config
=
KVTransferConfig
()
if
(
kv_offloading_size
:
=
self
.
cache_config
.
kv_offloading_size
)
is
None
:
raise
ValueError
(
"You must set kv_offloading_size when kv_offloading_backend is set."
)
num_kv_ranks
=
(
num_kv_ranks
=
(
self
.
parallel_config
.
tensor_parallel_size
self
.
parallel_config
.
tensor_parallel_size
*
self
.
parallel_config
.
pipeline_parallel_size
*
self
.
parallel_config
.
pipeline_parallel_size
...
...
vllm/engine/arg_utils.py
View file @
1be5a735
...
@@ -574,9 +574,7 @@ class EngineArgs:
...
@@ -574,9 +574,7 @@ class EngineArgs:
optimization_level
:
OptimizationLevel
=
VllmConfig
.
optimization_level
optimization_level
:
OptimizationLevel
=
VllmConfig
.
optimization_level
kv_offloading_size
:
float
|
None
=
CacheConfig
.
kv_offloading_size
kv_offloading_size
:
float
|
None
=
CacheConfig
.
kv_offloading_size
kv_offloading_backend
:
KVOffloadingBackend
|
None
=
(
kv_offloading_backend
:
KVOffloadingBackend
=
CacheConfig
.
kv_offloading_backend
CacheConfig
.
kv_offloading_backend
)
tokens_only
:
bool
=
False
tokens_only
:
bool
=
False
def
__post_init__
(
self
):
def
__post_init__
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment