Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
9532a6d5
Unverified
Commit
9532a6d5
authored
Jul 31, 2025
by
Cyrus Leung
Committed by
GitHub
Jul 30, 2025
Browse files
[Deprecation] Remove deprecated args and methods (#21907)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
3e36fcbe
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
5 additions
and
59 deletions
+5
-59
vllm/entrypoints/chat_utils.py
vllm/entrypoints/chat_utils.py
+4
-28
vllm/multimodal/registry.py
vllm/multimodal/registry.py
+0
-25
vllm/worker/neuron_model_runner.py
vllm/worker/neuron_model_runner.py
+1
-6
No files found.
vllm/entrypoints/chat_utils.py
View file @
9532a6d5
...
@@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
...
@@ -48,7 +48,7 @@ from vllm.transformers_utils.chat_templates import (
# yapf: enable
# yapf: enable
from
vllm.transformers_utils.processor
import
cached_get_processor
from
vllm.transformers_utils.processor
import
cached_get_processor
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
MistralTokenizer
from
vllm.transformers_utils.tokenizer
import
AnyTokenizer
,
MistralTokenizer
from
vllm.utils
import
deprecate_kwargs
,
random_uuid
from
vllm.utils
import
random_uuid
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
...
@@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
...
@@ -383,17 +383,12 @@ def resolve_mistral_chat_template(
return
None
return
None
@
deprecate_kwargs
(
"trust_remote_code"
,
additional_message
=
"Please use `model_config.trust_remote_code` instead."
,
)
def
resolve_hf_chat_template
(
def
resolve_hf_chat_template
(
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
chat_template
:
Optional
[
str
],
chat_template
:
Optional
[
str
],
tools
:
Optional
[
list
[
dict
[
str
,
Any
]]],
tools
:
Optional
[
list
[
dict
[
str
,
Any
]]],
*
,
*
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
trust_remote_code
:
Optional
[
bool
]
=
None
,
)
->
Optional
[
str
]:
)
->
Optional
[
str
]:
# 1st priority: The given chat template
# 1st priority: The given chat template
if
chat_template
is
not
None
:
if
chat_template
is
not
None
:
...
@@ -488,10 +483,6 @@ def _log_chat_template_content_format(
...
@@ -488,10 +483,6 @@ def _log_chat_template_content_format(
)
)
@
deprecate_kwargs
(
"trust_remote_code"
,
additional_message
=
"Please use `model_config.trust_remote_code` instead."
,
)
def
resolve_chat_template_content_format
(
def
resolve_chat_template_content_format
(
chat_template
:
Optional
[
str
],
chat_template
:
Optional
[
str
],
tools
:
Optional
[
list
[
dict
[
str
,
Any
]]],
tools
:
Optional
[
list
[
dict
[
str
,
Any
]]],
...
@@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
...
@@ -499,7 +490,6 @@ def resolve_chat_template_content_format(
tokenizer
:
AnyTokenizer
,
tokenizer
:
AnyTokenizer
,
*
,
*
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
trust_remote_code
:
Optional
[
bool
]
=
None
,
)
->
_ChatTemplateContentFormat
:
)
->
_ChatTemplateContentFormat
:
if
given_format
!=
"auto"
:
if
given_format
!=
"auto"
:
return
given_format
return
given_format
...
@@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
...
@@ -568,17 +558,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
input_modality
=
modality
.
replace
(
"_embeds"
,
""
)
input_modality
=
modality
.
replace
(
"_embeds"
,
""
)
if
mm_registry
.
has_processor
(
model_config
):
mm_processor
=
mm_registry
.
create_processor
(
model_config
)
mm_processor
=
mm_registry
.
create_processor
(
model_config
)
allowed_counts
=
mm_processor
.
info
.
get_allowed_mm_limits
()
allowed_counts
=
mm_processor
.
info
.
get_allowed_mm_limits
()
allowed_count
=
allowed_counts
.
get
(
input_modality
,
0
)
allowed_count
=
allowed_counts
.
get
(
input_modality
,
0
)
else
:
mm_config
=
model_config
.
multimodal_config
if
mm_config
is
None
:
msg
=
"This model does not support multi-modal inputs"
raise
ValueError
(
msg
)
allowed_count
=
mm_config
.
get_limit_per_prompt
(
input_modality
)
current_count
=
len
(
self
.
_items_by_modality
[
modality
])
+
1
current_count
=
len
(
self
.
_items_by_modality
[
modality
])
+
1
if
current_count
>
allowed_count
:
if
current_count
>
allowed_count
:
...
@@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
...
@@ -1285,10 +1267,6 @@ def parse_chat_messages_futures(
return
conversation
,
mm_tracker
.
all_mm_data
()
return
conversation
,
mm_tracker
.
all_mm_data
()
@
deprecate_kwargs
(
"trust_remote_code"
,
additional_message
=
"Please use `model_config.trust_remote_code` instead."
,
)
def
apply_hf_chat_template
(
def
apply_hf_chat_template
(
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
tokenizer
:
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
],
conversation
:
list
[
ConversationMessage
],
conversation
:
list
[
ConversationMessage
],
...
@@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
...
@@ -1297,8 +1275,6 @@ def apply_hf_chat_template(
*
,
*
,
model_config
:
ModelConfig
,
model_config
:
ModelConfig
,
tokenize
:
bool
=
False
,
# Different from HF's default
tokenize
:
bool
=
False
,
# Different from HF's default
# Deprecated, explicitly capture here so it doesn't slit into kwargs.
trust_remote_code
:
Optional
[
bool
]
=
None
,
**
kwargs
:
Any
,
**
kwargs
:
Any
,
)
->
str
:
)
->
str
:
hf_chat_template
=
resolve_hf_chat_template
(
hf_chat_template
=
resolve_hf_chat_template
(
...
...
vllm/multimodal/registry.py
View file @
9532a6d5
...
@@ -5,7 +5,6 @@ from dataclasses import dataclass
...
@@ -5,7 +5,6 @@ from dataclasses import dataclass
from
typing
import
TYPE_CHECKING
,
Generic
,
Optional
,
Protocol
,
TypeVar
from
typing
import
TYPE_CHECKING
,
Generic
,
Optional
,
Protocol
,
TypeVar
import
torch.nn
as
nn
import
torch.nn
as
nn
from
typing_extensions
import
deprecated
from
vllm.envs
import
VLLM_MM_INPUT_CACHE_GIB
from
vllm.envs
import
VLLM_MM_INPUT_CACHE_GIB
from
vllm.inputs
import
InputProcessingContext
from
vllm.inputs
import
InputProcessingContext
...
@@ -105,13 +104,6 @@ class MultiModalRegistry:
...
@@ -105,13 +104,6 @@ class MultiModalRegistry:
return
True
# Success
return
True
# Success
@
deprecated
(
"Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing."
)
def
create_input_mapper
(
self
,
model_config
:
"ModelConfig"
):
return
lambda
data
,
mm_processor_kwargs
:
data
def
get_max_tokens_per_item_by_modality
(
def
get_max_tokens_per_item_by_modality
(
self
,
self
,
model_config
:
"ModelConfig"
,
model_config
:
"ModelConfig"
,
...
@@ -182,16 +174,6 @@ class MultiModalRegistry:
...
@@ -182,16 +174,6 @@ class MultiModalRegistry:
"""
"""
return
sum
(
self
.
get_max_tokens_by_modality
(
model_config
).
values
())
return
sum
(
self
.
get_max_tokens_by_modality
(
model_config
).
values
())
@
deprecated
(
"Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing."
)
def
init_mm_limits_per_prompt
(
self
,
model_config
:
"ModelConfig"
,
)
->
None
:
pass
def
get_mm_limits_per_prompt
(
def
get_mm_limits_per_prompt
(
self
,
self
,
model_config
:
"ModelConfig"
,
model_config
:
"ModelConfig"
,
...
@@ -246,13 +228,6 @@ class MultiModalRegistry:
...
@@ -246,13 +228,6 @@ class MultiModalRegistry:
model_cls
,
_
=
get_model_architecture
(
model_config
)
model_cls
,
_
=
get_model_architecture
(
model_config
)
return
model_cls
return
model_cls
@
deprecated
(
"Legacy input processor/mapper pipeline has been removed. "
"Please update your model runner to use "
"`seq_group_metadata.multi_modal_data` directly without "
"further processing."
)
def
has_processor
(
self
,
model_config
:
"ModelConfig"
)
->
bool
:
return
True
def
create_processor
(
def
create_processor
(
self
,
self
,
model_config
:
"ModelConfig"
,
model_config
:
"ModelConfig"
,
...
...
vllm/worker/neuron_model_runner.py
View file @
9532a6d5
...
@@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
...
@@ -15,8 +15,7 @@ from vllm.lora.request import LoRARequest
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor
import
SamplingMetadata
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.layers.sampler
import
SamplerOutput
from
vllm.model_executor.model_loader.neuron
import
get_neuron_model
from
vllm.model_executor.model_loader.neuron
import
get_neuron_model
from
vllm.multimodal
import
(
MULTIMODAL_REGISTRY
,
BatchedTensorInputs
,
from
vllm.multimodal
import
BatchedTensorInputs
,
MultiModalKwargs
MultiModalKwargs
)
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.sampling_params
import
SamplingParams
from
vllm.sampling_params
import
SamplingParams
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
from
vllm.sequence
import
IntermediateTensors
,
SequenceGroupMetadata
...
@@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
...
@@ -88,10 +87,6 @@ class NeuronModelRunner(ModelRunnerBase[ModelInputForNeuron]):
self
.
device
=
self
.
device_config
.
device
self
.
device
=
self
.
device_config
.
device
self
.
pin_memory
=
is_pin_memory_available
()
self
.
pin_memory
=
is_pin_memory_available
()
# Multi-modal data support
self
.
multi_modal_input_mapper
=
MULTIMODAL_REGISTRY
\
.
create_input_mapper
(
self
.
model_config
)
# Lazy initialization.
# Lazy initialization.
self
.
model
:
nn
.
Module
# initialize after load_model.
self
.
model
:
nn
.
Module
# initialize after load_model.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment