Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
myrfy001
vllm_dsv4
Commits
c3598d02
Unverified
Commit
c3598d02
authored
Mar 05, 2026
by
Martin Hickey
Committed by
GitHub
Mar 05, 2026
Browse files
[Misc] Remove deprecated items that are due for removal (#36006)
Signed-off-by:
Martin Hickey
<
martin.hickey@ie.ibm.com
>
parent
57c629e9
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
0 additions
and
51 deletions
+0
-51
vllm/config/cache.py
vllm/config/cache.py
+0
-18
vllm/config/compilation.py
vllm/config/compilation.py
+0
-18
vllm/multimodal/processing/processor.py
vllm/multimodal/processing/processor.py
+0
-15
No files found.
vllm/config/cache.py
View file @
c3598d02
...
@@ -92,24 +92,6 @@ class CacheConfig:
...
@@ -92,24 +92,6 @@ class CacheConfig:
benefits before turning this on.
\n
benefits before turning this on.
\n
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for
- "xxhash_cbor" combines canonical CBOR serialization with xxHash for
reproducible hashing. Requires the optional ``xxhash`` package."""
reproducible hashing. Requires the optional ``xxhash`` package."""
cpu_offload_gb
:
float
=
Field
(
default
=
0
,
ge
=
0
)
"""The space in GiB to offload to CPU, per GPU. Default is 0, which means
no offloading. Intuitively, this argument can be seen as a virtual way to
increase the GPU memory size. For example, if you have one 24 GB GPU and
set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
Note that this requires fast CPU-GPU interconnect, as part of the model is
loaded from CPU memory to GPU memory on the fly in each model forward pass.
DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_gb instead.
"""
cpu_offload_params
:
set
[
str
]
=
Field
(
default_factory
=
set
)
"""The set of parameter name segments to target for CPU offloading.
DEPRECATED: This field is deprecated and will be removed in v0.16.
Please use OffloadConfig.uva.cpu_offload_params instead.
"""
calculate_kv_scales
:
bool
=
False
calculate_kv_scales
:
bool
=
False
"""This enables dynamic calculation of `k_scale` and `v_scale` when
"""This enables dynamic calculation of `k_scale` and `v_scale` when
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model
...
...
vllm/config/compilation.py
View file @
c3598d02
...
@@ -381,13 +381,6 @@ class CompilationConfig:
...
@@ -381,13 +381,6 @@ class CompilationConfig:
certain small batchsizes, where inductor is good at optimizing.
certain small batchsizes, where inductor is good at optimizing.
"""
"""
# Top-level Compilation control
level
:
int
=
Field
(
default
=
None
)
"""
Level is deprecated and will be removed in the next release,
either 0.12.0 or 0.11.2 whichever is soonest.
Please use mode. Currently all levels are mapped to mode.
"""
# Top-level Compilation control
# Top-level Compilation control
mode
:
CompilationMode
=
Field
(
default
=
None
)
mode
:
CompilationMode
=
Field
(
default
=
None
)
"""The compilation approach used for torch.compile-based compilation of the
"""The compilation approach used for torch.compile-based compilation of the
...
@@ -801,17 +794,6 @@ class CompilationConfig:
...
@@ -801,17 +794,6 @@ class CompilationConfig:
return
handler
(
value
)
return
handler
(
value
)
def
__post_init__
(
self
)
->
None
:
def
__post_init__
(
self
)
->
None
:
if
self
.
level
is
not
None
:
logger
.
warning
(
"Level is deprecated and will be removed in the next release,"
"either 0.12.0 or 0.11.2 whichever is soonest."
"Use mode instead."
"If both level and mode are given,"
"only mode will be used."
)
if
self
.
mode
is
None
:
self
.
mode
=
self
.
level
count_none
=
self
.
custom_ops
.
count
(
"none"
)
count_none
=
self
.
custom_ops
.
count
(
"none"
)
count_all
=
self
.
custom_ops
.
count
(
"all"
)
count_all
=
self
.
custom_ops
.
count
(
"all"
)
assert
count_none
+
count_all
<=
1
,
"Can only specify 'none' or 'all'"
assert
count_none
+
count_all
<=
1
,
"Can only specify 'none' or 'all'"
...
...
vllm/multimodal/processing/processor.py
View file @
c3598d02
...
@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
...
@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
mm_items
.
get_all_counts
(),
mm_items
.
get_all_counts
(),
)
)
for
modality
,
prompt_updates
in
mm_prompt_updates
.
items
():
for
item_idx
,
item_prompt_updates
in
enumerate
(
prompt_updates
):
if
len
(
item_prompt_updates
)
>
1
:
logger
.
warning_once
(
"Detected %d prompt updates for `mm_items[%r][%s]`. "
"Multiple prompt updates per item is now "
"deprecated and may be removed in v0.13. "
"Instead, please specify dynamic update targets "
"in the same prompt update definition by passing "
"a function to `PromptUpdate.target`."
,
len
(
prompt_updates
),
modality
,
item_idx
,
)
return
mm_prompt_updates
return
mm_prompt_updates
def
_find_mm_placeholders
(
def
_find_mm_placeholders
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment