Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ec79b67c
Unverified
Commit
ec79b67c
authored
Mar 04, 2025
by
Roger Wang
Committed by
GitHub
Mar 05, 2025
Browse files
[Misc][V1] Avoid using `envs.VLLM_USE_V1` in mm processing (#14256)
Signed-off-by:
Roger Wang
<
ywang@roblox.com
>
parent
32985bed
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
38 additions
and
8 deletions
+38
-8
vllm/inputs/preprocess.py
vllm/inputs/preprocess.py
+22
-2
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava.py
+3
-1
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minicpmv.py
+3
-1
vllm/model_executor/models/mllama.py
vllm/model_executor/models/mllama.py
+3
-1
vllm/model_executor/models/prithvi_geospatial_mae.py
vllm/model_executor/models/prithvi_geospatial_mae.py
+1
-0
vllm/multimodal/processing.py
vllm/multimodal/processing.py
+5
-3
vllm/v1/engine/processor.py
vllm/v1/engine/processor.py
+1
-0
No files found.
vllm/inputs/preprocess.py
View file @
ec79b67c
...
@@ -254,6 +254,7 @@ class InputPreprocessor:
...
@@ -254,6 +254,7 @@ class InputPreprocessor:
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
lora_request
:
Optional
[
LoRARequest
],
lora_request
:
Optional
[
LoRARequest
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
"""
"""
Apply the model's multi-modal processor to a multi-modal prompt,
Apply the model's multi-modal processor to a multi-modal prompt,
...
@@ -274,7 +275,8 @@ class InputPreprocessor:
...
@@ -274,7 +275,8 @@ class InputPreprocessor:
if
mm_processor_kwargs
is
None
:
if
mm_processor_kwargs
is
None
:
mm_processor_kwargs
=
{}
mm_processor_kwargs
=
{}
return
mm_processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
return
mm_processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
,
return_mm_hashes
)
async
def
_process_multimodal_async
(
async
def
_process_multimodal_async
(
self
,
self
,
...
@@ -282,6 +284,7 @@ class InputPreprocessor:
...
@@ -282,6 +284,7 @@ class InputPreprocessor:
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
lora_request
:
Optional
[
LoRARequest
],
lora_request
:
Optional
[
LoRARequest
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
"""Async version of :meth:`_process_multimodal`."""
"""Async version of :meth:`_process_multimodal`."""
# At the moment on model (PrithviGeoSpatialMAE) requires to be
# At the moment on model (PrithviGeoSpatialMAE) requires to be
...
@@ -299,13 +302,15 @@ class InputPreprocessor:
...
@@ -299,13 +302,15 @@ class InputPreprocessor:
if
mm_processor_kwargs
is
None
:
if
mm_processor_kwargs
is
None
:
mm_processor_kwargs
=
{}
mm_processor_kwargs
=
{}
return
mm_processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
)
return
mm_processor
.
apply
(
prompt
,
mm_data
,
mm_processor_kwargs
,
return_mm_hashes
)
def
_prompt_to_llm_inputs
(
def
_prompt_to_llm_inputs
(
self
,
self
,
prompt
:
SingletonPrompt
,
prompt
:
SingletonPrompt
,
request_id
:
str
,
request_id
:
str
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
return_mm_hashes
:
bool
=
False
,
)
->
SingletonInputs
:
)
->
SingletonInputs
:
"""
"""
Extract the singleton inputs from a prompt.
Extract the singleton inputs from a prompt.
...
@@ -315,6 +320,7 @@ class InputPreprocessor:
...
@@ -315,6 +320,7 @@ class InputPreprocessor:
* request_id
* request_id
* prompt: single encoder or decoder input prompt
* prompt: single encoder or decoder input prompt
* lora_request: this is only valid for decoder prompts
* lora_request: this is only valid for decoder prompts
* return_mm_hashes: whether to return multimodal hashes
Returns:
Returns:
...
@@ -349,6 +355,7 @@ class InputPreprocessor:
...
@@ -349,6 +355,7 @@ class InputPreprocessor:
multi_modal_data
,
multi_modal_data
,
mm_processor_kwargs
,
mm_processor_kwargs
,
lora_request
=
lora_request
,
lora_request
=
lora_request
,
return_mm_hashes
=
return_mm_hashes
,
)
)
return
token_inputs
(
return
token_inputs
(
...
@@ -695,6 +702,7 @@ class InputPreprocessor:
...
@@ -695,6 +702,7 @@ class InputPreprocessor:
request_id
:
str
,
request_id
:
str
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
return_mm_hashes
:
bool
=
False
,
)
->
DecoderOnlyInputs
:
)
->
DecoderOnlyInputs
:
"""
"""
For decoder-only models:
For decoder-only models:
...
@@ -706,6 +714,7 @@ class InputPreprocessor:
...
@@ -706,6 +714,7 @@ class InputPreprocessor:
* request_id
* request_id
* lora_request
* lora_request
* prompt_adapter_request
* prompt_adapter_request
* return_mm_hashes
Returns:
Returns:
...
@@ -729,6 +738,7 @@ class InputPreprocessor:
...
@@ -729,6 +738,7 @@ class InputPreprocessor:
request_id
:
str
,
request_id
:
str
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
return_mm_hashes
:
bool
=
False
,
)
->
DecoderOnlyInputs
:
)
->
DecoderOnlyInputs
:
"""Async version of :meth:`_process_decoder_only_prompt`."""
"""Async version of :meth:`_process_decoder_only_prompt`."""
prompt_comps
=
await
self
.
_prompt_to_llm_inputs_async
(
prompt_comps
=
await
self
.
_prompt_to_llm_inputs_async
(
...
@@ -748,9 +758,13 @@ class InputPreprocessor:
...
@@ -748,9 +758,13 @@ class InputPreprocessor:
request_id
:
str
,
request_id
:
str
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
return_mm_hashes
:
bool
=
False
,
)
->
ProcessorInputs
:
)
->
ProcessorInputs
:
"""Preprocess the input prompt."""
"""Preprocess the input prompt."""
if
self
.
model_config
.
is_encoder_decoder
:
if
self
.
model_config
.
is_encoder_decoder
:
assert
not
return_mm_hashes
,
(
"Multimodal hashes for encoder-decoder models should not be "
,
"returned until they are supported on vLLM V1."
)
# Encoder-decoder model requires special mapping of
# Encoder-decoder model requires special mapping of
# input prompts to encoder & decoder
# input prompts to encoder & decoder
return
self
.
_process_encoder_decoder_prompt
(
return
self
.
_process_encoder_decoder_prompt
(
...
@@ -768,6 +782,7 @@ class InputPreprocessor:
...
@@ -768,6 +782,7 @@ class InputPreprocessor:
request_id
=
request_id
,
request_id
=
request_id
,
lora_request
=
lora_request
,
lora_request
=
lora_request
,
prompt_adapter_request
=
prompt_adapter_request
,
prompt_adapter_request
=
prompt_adapter_request
,
return_mm_hashes
=
return_mm_hashes
,
)
)
async
def
preprocess_async
(
async
def
preprocess_async
(
...
@@ -776,9 +791,13 @@ class InputPreprocessor:
...
@@ -776,9 +791,13 @@ class InputPreprocessor:
request_id
:
str
,
request_id
:
str
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
lora_request
:
Optional
[
LoRARequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
prompt_adapter_request
:
Optional
[
PromptAdapterRequest
]
=
None
,
return_mm_hashes
:
bool
=
False
,
)
->
ProcessorInputs
:
)
->
ProcessorInputs
:
"""Async version of :meth:`preprocess`."""
"""Async version of :meth:`preprocess`."""
if
self
.
model_config
.
is_encoder_decoder
:
if
self
.
model_config
.
is_encoder_decoder
:
assert
not
return_mm_hashes
,
(
"Multimodal hashes for encoder-decoder models should not be "
,
"returned until they are supported on vLLM V1."
)
# Encoder-decoder model requires special mapping of
# Encoder-decoder model requires special mapping of
# input prompts to encoder & decoder
# input prompts to encoder & decoder
return
await
self
.
_process_encoder_decoder_prompt_async
(
return
await
self
.
_process_encoder_decoder_prompt_async
(
...
@@ -796,4 +815,5 @@ class InputPreprocessor:
...
@@ -796,4 +815,5 @@ class InputPreprocessor:
request_id
=
request_id
,
request_id
=
request_id
,
lora_request
=
lora_request
,
lora_request
=
lora_request
,
prompt_adapter_request
=
prompt_adapter_request
,
prompt_adapter_request
=
prompt_adapter_request
,
return_mm_hashes
=
return_mm_hashes
,
)
)
vllm/model_executor/models/llava.py
View file @
ec79b67c
...
@@ -767,6 +767,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
...
@@ -767,6 +767,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
prompt
:
Union
[
str
,
list
[
int
]],
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
hf_config
=
self
.
info
.
get_hf_config
()
hf_config
=
self
.
info
.
get_hf_config
()
image_token_id
=
hf_config
.
image_token_index
image_token_id
=
hf_config
.
image_token_index
...
@@ -777,7 +778,8 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
...
@@ -777,7 +778,8 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
image_height
=-
1
,
image_height
=-
1
,
)
)
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
,
return_mm_hashes
)
mm_items
=
self
.
_to_mm_items
(
mm_data
)
mm_items
=
self
.
_to_mm_items
(
mm_data
)
mm_item_counts
=
mm_items
.
get_all_counts
()
mm_item_counts
=
mm_items
.
get_all_counts
()
...
...
vllm/model_executor/models/minicpmv.py
View file @
ec79b67c
...
@@ -780,6 +780,7 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
...
@@ -780,6 +780,7 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
prompt
:
Union
[
str
,
List
[
int
]],
prompt
:
Union
[
str
,
List
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
supported_mm_modalities
=
self
.
info
.
get_supported_mm_modalities
()
supported_mm_modalities
=
self
.
info
.
get_supported_mm_modalities
()
if
isinstance
(
prompt
,
list
):
if
isinstance
(
prompt
,
list
):
...
@@ -791,7 +792,8 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
...
@@ -791,7 +792,8 @@ class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]):
[
index
for
index
,
m
in
enumerate
(
matches
)
if
m
==
modality
])
[
index
for
index
,
m
in
enumerate
(
matches
)
if
m
==
modality
])
for
modality
in
supported_mm_modalities
for
modality
in
supported_mm_modalities
}
}
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
,
return_mm_hashes
)
# Exclude <image_id>x</image_id> from placeholders
# Exclude <image_id>x</image_id> from placeholders
if
"image"
in
result
[
"mm_placeholders"
]
and
\
if
"image"
in
result
[
"mm_placeholders"
]
and
\
self
.
info
.
get_model_version
()
==
(
2
,
6
):
self
.
info
.
get_model_version
()
==
(
2
,
6
):
...
...
vllm/model_executor/models/mllama.py
View file @
ec79b67c
...
@@ -175,8 +175,10 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
...
@@ -175,8 +175,10 @@ class MllamaMultiModalProcessor(EncDecMultiModalProcessor[MllamaProcessingInfo]
prompt
:
Union
[
str
,
list
[
int
]],
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalEncDecInputs
:
)
->
MultiModalEncDecInputs
:
mm_inputs
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
mm_inputs
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
,
return_mm_hashes
)
# Check that the number of image tokens in the decoder prompt matches
# Check that the number of image tokens in the decoder prompt matches
# the number of images provided in mm_data
# the number of images provided in mm_data
...
...
vllm/model_executor/models/prithvi_geospatial_mae.py
View file @
ec79b67c
...
@@ -93,6 +93,7 @@ class PrithviGeoSpatialMAEMultiModalProcessor(BaseMultiModalProcessor):
...
@@ -93,6 +93,7 @@ class PrithviGeoSpatialMAEMultiModalProcessor(BaseMultiModalProcessor):
prompt
:
Union
[
str
,
list
[
int
]],
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
mm_kwargs
=
{}
mm_kwargs
=
{}
...
...
vllm/multimodal/processing.py
View file @
ec79b67c
...
@@ -14,7 +14,6 @@ from typing import (TYPE_CHECKING, Generic, NamedTuple, Optional, Protocol,
...
@@ -14,7 +14,6 @@ from typing import (TYPE_CHECKING, Generic, NamedTuple, Optional, Protocol,
from
transformers
import
BatchFeature
,
PretrainedConfig
,
ProcessorMixin
from
transformers
import
BatchFeature
,
PretrainedConfig
,
ProcessorMixin
from
typing_extensions
import
assert_never
from
typing_extensions
import
assert_never
import
vllm.envs
as
envs
from
vllm.inputs
import
InputProcessingContext
from
vllm.inputs
import
InputProcessingContext
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.transformers_utils.tokenizer
import
(
AnyTokenizer
,
decode_tokens
,
from
vllm.transformers_utils.tokenizer
import
(
AnyTokenizer
,
decode_tokens
,
...
@@ -1435,6 +1434,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
...
@@ -1435,6 +1434,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
prompt
:
Union
[
str
,
list
[
int
]],
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalInputs
:
)
->
MultiModalInputs
:
"""
"""
Process multi-modal inputs to be used in vLLM.
Process multi-modal inputs to be used in vLLM.
...
@@ -1451,11 +1451,11 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
...
@@ -1451,11 +1451,11 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
"""
"""
mm_items
=
self
.
_to_mm_items
(
mm_data
)
mm_items
=
self
.
_to_mm_items
(
mm_data
)
# Create MM hashes (only used in V1)
# Create MM hashes
to be returned
(only used in V1)
# TODO: Use these hash keys for caching operations in apply_hf_processor
# TODO: Use these hash keys for caching operations in apply_hf_processor
# instead of rehashing.
# instead of rehashing.
if
envs
.
VLLM_USE_V1
:
if
return_mm_hashes
:
model_id
=
self
.
info
.
model_id
model_id
=
self
.
info
.
model_id
mm_hashes
=
{
mm_hashes
=
{
modality
:
[
modality
:
[
...
@@ -1554,6 +1554,7 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
...
@@ -1554,6 +1554,7 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
prompt
:
Union
[
str
,
list
[
int
]],
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
return_mm_hashes
:
bool
=
False
,
)
->
MultiModalEncDecInputs
:
)
->
MultiModalEncDecInputs
:
"""
"""
Process multi-modal inputs to be used in vLLM.
Process multi-modal inputs to be used in vLLM.
...
@@ -1567,6 +1568,7 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
...
@@ -1567,6 +1568,7 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
encoder_prompt
,
encoder_prompt
,
mm_data
,
mm_data
,
hf_processor_mm_kwargs
,
hf_processor_mm_kwargs
,
return_mm_hashes
,
)
)
tokenizer
=
self
.
info
.
get_tokenizer
()
tokenizer
=
self
.
info
.
get_tokenizer
()
...
...
vllm/v1/engine/processor.py
View file @
ec79b67c
...
@@ -131,6 +131,7 @@ class Processor:
...
@@ -131,6 +131,7 @@ class Processor:
request_id
=
request_id
,
request_id
=
request_id
,
lora_request
=
lora_request
,
lora_request
=
lora_request
,
prompt_adapter_request
=
prompt_adapter_request
,
prompt_adapter_request
=
prompt_adapter_request
,
return_mm_hashes
=
self
.
use_hash
,
)
)
eos_token_id
=
self
.
input_preprocessor
.
get_eos_token_id
(
lora_request
)
eos_token_id
=
self
.
input_preprocessor
.
get_eos_token_id
(
lora_request
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment