[Misc] Remove deprecated items that are due for removal (#36006)

Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>

[Misc] Remove deprecated items that are due for removal (#36006)
Signed-off-by: Martin Hickey <martin.hickey@ie.ibm.com>
c3598d02 · Martin Hickey · GitHub · 57c629e9 · c3598d02 · c3598d02
Unverified Commit c3598d02 authored Mar 05, 2026 by Martin Hickey Committed by GitHub Mar 05, 2026
Showing with 0 additions and 51 deletions

vllm/config/cache.py vllm/config/cache.py +0 -18

vllm/config/compilation.py vllm/config/compilation.py +0 -18

vllm/multimodal/processing/processor.py vllm/multimodal/processing/processor.py +0 -15

No files found.
--- a/vllm/config/cache.py
+++ b/vllm/config/cache.py
@@ -92,24 +92,6 @@ class CacheConfig:
    benefits before turning this on.\n
    - "xxhash_cbor" combines canonical CBOR serialization with xxHash for
    reproducible hashing. Requires the optional ``xxhash`` package."""
-    cpu_offload_gb: float = Field(default=0, ge=0)
-    """The space in GiB to offload to CPU, per GPU. Default is 0, which means
-    no offloading. Intuitively, this argument can be seen as a virtual way to
-    increase the GPU memory size. For example, if you have one 24 GB GPU and
-    set this to 10, virtually you can think of it as a 34 GB GPU. Then you can
-    load a 13B model with BF16 weight, which requires at least 26GB GPU memory.
-    Note that this requires fast CPU-GPU interconnect, as part of the model is
-    loaded from CPU memory to GPU memory on the fly in each model forward pass.
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_gb instead.
-    """
-    cpu_offload_params: set[str] = Field(default_factory=set)
-    """The set of parameter name segments to target for CPU offloading.
-    DEPRECATED: This field is deprecated and will be removed in v0.16.
-    Please use OffloadConfig.uva.cpu_offload_params instead.
-    """
    calculate_kv_scales: bool = False
    """This enables dynamic calculation of `k_scale` and `v_scale` when
    kv_cache_dtype is fp8. If `False`, the scales will be loaded from the model

--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -381,13 +381,6 @@ class CompilationConfig:
        certain small batchsizes, where inductor is good at optimizing.
    """
-    # Top-level Compilation control
-    level: int = Field(default=None)
-    """
-    Level is deprecated and will be removed in the next release,
-    either 0.12.0 or 0.11.2 whichever is soonest.
-    Please use mode. Currently all levels are mapped to mode.
-    """
    # Top-level Compilation control
    mode: CompilationMode = Field(default=None)
    """The compilation approach used for torch.compile-based compilation of the
@@ -801,17 +794,6 @@ class CompilationConfig:
        return handler(value)
    def __post_init__(self) -> None:
-        if self.level is not None:
-            logger.warning(
-                "Level is deprecated and will be removed in the next release,"
-                "either 0.12.0 or 0.11.2 whichever is soonest."
-                "Use mode instead."
-                "If both level and mode are given,"
-                "only mode will be used."
-            )
-            if self.mode is None:
-                self.mode = self.level
        count_none = self.custom_ops.count("none")
        count_all = self.custom_ops.count("all")
        assert count_none + count_all <= 1, "Can only specify 'none' or 'all'"

--- a/vllm/multimodal/processing/processor.py
+++ b/vllm/multimodal/processing/processor.py
@@ -1074,21 +1074,6 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
            mm_items.get_all_counts(),
        )
-        for modality, prompt_updates in mm_prompt_updates.items():
-            for item_idx, item_prompt_updates in enumerate(prompt_updates):
-                if len(item_prompt_updates) > 1:
-                    logger.warning_once(
-                        "Detected %d prompt updates for `mm_items[%r][%s]`. "
-                        "Multiple prompt updates per item is now "
-                        "deprecated and may be removed in v0.13. "
-                        "Instead, please specify dynamic update targets "
-                        "in the same prompt update definition by passing "
-                        "a function to `PromptUpdate.target`.",
-                        len(prompt_updates),
-                        modality,
-                        item_idx,
-                    )
        return mm_prompt_updates
    def _find_mm_placeholders(