[Deprecation] Remove deprecated args and methods (#21907)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Deprecation] Remove deprecated args and methods (#21907)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
9532a6d5 · Cyrus Leung · GitHub · 3e36fcbe · 9532a6d5 · 9532a6d5
Unverified Commit 9532a6d5 authored Jul 31, 2025 by Cyrus Leung Committed by GitHub Jul 30, 2025
Showing with 5 additions and 59 deletions

vllm/entrypoints/chat_utils.py vllm/entrypoints/chat_utils.py +4 -28

vllm/multimodal/registry.py vllm/multimodal/registry.py +0 -25

vllm/worker/neuron_model_runner.py vllm/worker/neuron_model_runner.py +1 -6

No files found.
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
 # yapf: enable
 from vllm.transformers_utils.processor import cached_get_processor
 from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
-from vllm.utils import deprecate_kwargs, random_uuid
+from vllm.utils import random_uuid
 logger = init_logger(__name__)
@@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
    return None
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_hf_chat_template(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    chat_template: Optional[str],
    tools: Optional[list[dict[str, Any]]],
    *,
    model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> Optional[str]:
    # 1st priority: The given chat template
    if chat_template is not None:
@@ -488,10 +483,6 @@ def _log_chat_template_content_format(
        )
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def resolve_chat_template_content_format(
    chat_template: Optional[str],
    tools: Optional[list[dict[str, Any]]],
@@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
    tokenizer: AnyTokenizer,
    *,
    model_config: ModelConfig,
-    trust_remote_code: Optional[bool] = None,
 ) -> _ChatTemplateContentFormat:
    if given_format != "auto":
        return given_format
@@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
        input_modality = modality.replace("_embeds", "")
-        if mm_registry.has_processor(model_config):
        mm_processor = mm_registry.create_processor(model_config)
        allowed_counts = mm_processor.info.get_allowed_mm_limits()
        allowed_count = allowed_counts.get(input_modality, 0)
-        else:
-            mm_config = model_config.multimodal_config
-            if mm_config is None:
-                msg = "This model does not support multi-modal inputs"
-                raise ValueError(msg)
-            allowed_count = mm_config.get_limit_per_prompt(input_modality)
        current_count = len(self._items_by_modality[modality]) + 1
        if current_count > allowed_count:
@@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
    return conversation, mm_tracker.all_mm_data()
-@deprecate_kwargs(
-    "trust_remote_code",
-    additional_message="Please use `model_config.trust_remote_code` instead.",
-)
 def apply_hf_chat_template(
    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
    conversation: list[ConversationMessage],
@@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
    *,
    model_config: ModelConfig,
    tokenize: bool = False,  # Different from HF's default
-    # Deprecated, explicitly capture here so it doesn't slit into kwargs.
-    trust_remote_code: Optional[bool] = None,
    **kwargs: Any,
 ) -> str:
    hf_chat_template = resolve_hf_chat_template(

--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -5,7 +5,6 @@ from dataclasses import dataclass
 from typing import TYPE_CHECKING, Generic, Optional, Protocol, TypeVar
 import torch.nn as nn
-from typing_extensions import deprecated
 from vllm.envs import VLLM_MM_INPUT_CACHE_GIB
 from vllm.inputs import InputProcessingContext
@@ -105,13 +104,6 @@ class MultiModalRegistry:
        return True  # Success
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def create_input_mapper(self, model_config: "ModelConfig"):
-        return lambda data, mm_processor_kwargs: data
    def get_max_tokens_per_item_by_modality(
        self,
        model_config: "ModelConfig",
@@ -182,16 +174,6 @@ class MultiModalRegistry:
        """
        return sum(self.get_max_tokens_by_modality(model_config).values())
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def init_mm_limits_per_prompt(
-        self,
-        model_config: "ModelConfig",
-    ) -> None:
-        pass
    def get_mm_limits_per_prompt(
        self,
        model_config: "ModelConfig",
@@ -246,13 +228,6 @@ class MultiModalRegistry:
        model_cls, _ = get_model_architecture(model_config)
        return model_cls
-    @deprecated("Legacy input processor/mapper pipeline has been removed. "
-                "Please update your model runner to use "
-                "`seq_group_metadata.multi_modal_data` directly without "
-                "further processing.")
-    def has_processor(self, model_config: "ModelConfig") -> bool:
-        return True
    def create_processor(
        self,
        model_config: "ModelConfig",

--- a/vllm/worker/neuron_model_runner.py
+++ b/vllm/worker/neuron_model_runner.py
@@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
 from vllm.model_executor import SamplingMetadata
 from vllm.model_executor.layers.sampler import SamplerOutput
 from vllm.model_executor.model_loader.neuron import get_neuron_model
-from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs,
+from vllm.multimodal import BatchedTensorInputs, MultiModalKwargs
-                             MultiModalKwargs)
 from vllm.platforms import current_platform
 from vllm.sampling_params import SamplingParams
 from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
@@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
        self.device = self.device_config.device
        self.pin_memory = is_pin_memory_available()
-        # Multi-modal data support
-        self.multi_modal_input_mapper = MULTIMODAL_REGISTRY \
-            .create_input_mapper(self.model_config)
        # Lazy initialization.
        self.model: nn.Module  # initialize after load_model.