Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
96912550
Unverified
Commit
96912550
authored
Jan 21, 2025
by
Cyrus Leung
Committed by
GitHub
Jan 21, 2025
Browse files
[Misc] Rename `MultiModalInputsV2 -> MultiModalInputs` (#12244)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
2fc6944c
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
31 additions
and
31 deletions
+31
-31
docs/source/api/multimodal/inputs.md
docs/source/api/multimodal/inputs.md
+1
-1
vllm/inputs/data.py
vllm/inputs/data.py
+6
-6
vllm/inputs/preprocess.py
vllm/inputs/preprocess.py
+3
-3
vllm/model_executor/models/blip2.py
vllm/model_executor/models/blip2.py
+2
-2
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chameleon.py
+2
-2
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/fuyu.py
+2
-2
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava.py
+3
-3
vllm/model_executor/models/phi3v.py
vllm/model_executor/models/phi3v.py
+2
-2
vllm/model_executor/models/qwen2_audio.py
vllm/model_executor/models/qwen2_audio.py
+2
-2
vllm/multimodal/inputs.py
vllm/multimodal/inputs.py
+1
-1
vllm/multimodal/processing.py
vllm/multimodal/processing.py
+5
-5
vllm/multimodal/profiling.py
vllm/multimodal/profiling.py
+2
-2
No files found.
docs/source/api/multimodal/inputs.md
View file @
96912550
...
...
@@ -43,7 +43,7 @@
```
```
{eval-rst}
.. autoclass:: vllm.multimodal.inputs.MultiModalInputs
V2
.. autoclass:: vllm.multimodal.inputs.MultiModalInputs
:members:
:show-inheritance:
```
vllm/inputs/data.py
View file @
96912550
...
...
@@ -9,7 +9,7 @@ from typing_extensions import NotRequired, TypedDict, TypeVar, assert_never
if
TYPE_CHECKING
:
from
vllm.multimodal
import
(
MultiModalDataDict
,
MultiModalKwargs
,
MultiModalPlaceholderDict
)
from
vllm.multimodal.inputs
import
MultiModalInputs
V2
from
vllm.multimodal.inputs
import
MultiModalInputs
class
TextPrompt
(
TypedDict
):
...
...
@@ -207,7 +207,7 @@ def token_inputs(
return
inputs
DecoderOnlyInputs
=
Union
[
TokenInputs
,
"MultiModalInputs
V2
"
]
DecoderOnlyInputs
=
Union
[
TokenInputs
,
"MultiModalInputs"
]
"""
The inputs in :class:`~vllm.LLMEngine` before they are
passed to the model executor.
...
...
@@ -222,14 +222,14 @@ class EncoderDecoderInputs(TypedDict):
This specifies the required data for encoder-decoder models.
"""
encoder
:
Union
[
TokenInputs
,
"MultiModalInputs
V2
"
]
encoder
:
Union
[
TokenInputs
,
"MultiModalInputs"
]
"""The inputs for the encoder portion."""
decoder
:
Union
[
TokenInputs
,
"MultiModalInputs
V2
"
]
decoder
:
Union
[
TokenInputs
,
"MultiModalInputs"
]
"""The inputs for the decoder portion."""
SingletonInputs
=
Union
[
TokenInputs
,
"MultiModalInputs
V2
"
]
SingletonInputs
=
Union
[
TokenInputs
,
"MultiModalInputs"
]
"""
A processed :class:`SingletonPrompt` which can be passed to
:class:`vllm.sequence.Sequence`.
...
...
@@ -311,7 +311,7 @@ class SingletonInputsAdapter:
return
inputs
.
get
(
"multi_modal_hashes"
,
[])
if
inputs
[
"type"
]
==
"multimodal"
:
# only the case when we use MultiModalInputs
V2
# only the case when we use MultiModalInputs
return
inputs
.
get
(
"mm_hashes"
,
[])
# type: ignore[return-value]
assert_never
(
inputs
)
# type: ignore[arg-type]
...
...
vllm/inputs/preprocess.py
View file @
96912550
...
...
@@ -7,7 +7,7 @@ from vllm.config import ModelConfig
from
vllm.logger
import
init_logger
from
vllm.lora.request
import
LoRARequest
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalRegistry
from
vllm.multimodal.inputs
import
MultiModalDataDict
,
MultiModalInputs
V2
from
vllm.multimodal.inputs
import
MultiModalDataDict
,
MultiModalInputs
from
vllm.prompt_adapter.request
import
PromptAdapterRequest
from
vllm.transformers_utils.tokenizer_group
import
BaseTokenizerGroup
...
...
@@ -247,7 +247,7 @@ class InputPreprocessor:
mm_data
:
MultiModalDataDict
,
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
lora_request
:
Optional
[
LoRARequest
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
"""
Apply the model's multi-modal processor to a multi-modal prompt,
returning the corresponding token IDs and metadata.
...
...
@@ -271,7 +271,7 @@ class InputPreprocessor:
mm_data
:
MultiModalDataDict
,
mm_processor_kwargs
:
Optional
[
Mapping
[
str
,
object
]],
lora_request
:
Optional
[
LoRARequest
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
"""Async version of :meth:`_process_multimodal`."""
tokenizer_group
=
self
.
get_tokenizer_group
()
tokenizer
=
await
tokenizer_group
.
get_lora_tokenizer_async
(
lora_request
...
...
vllm/model_executor/models/blip2.py
View file @
96912550
...
...
@@ -15,7 +15,7 @@ from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
,
PlaceholderRange
)
from
vllm.multimodal.parse
import
MultiModalDataItems
from
vllm.multimodal.processing
import
(
BaseMultiModalProcessor
,
...
...
@@ -490,7 +490,7 @@ class Blip2MultiModalProcessor(BaseMultiModalProcessor[Blip2ProcessingInfo]):
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
# Only <image> tokens should be considered as placeholders,
...
...
vllm/model_executor/models/chameleon.py
View file @
96912550
...
...
@@ -29,7 +29,7 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata
from
vllm.model_executor.utils
import
set_weight_attrs
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
,
PlaceholderRange
)
from
vllm.multimodal.parse
import
MultiModalDataItems
from
vllm.multimodal.processing
import
(
BaseMultiModalProcessor
,
...
...
@@ -159,7 +159,7 @@ class ChameleonMultiModalProcessor(
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
# Only <image> tokens should be considered as placeholders,
...
...
vllm/model_executor/models/fuyu.py
View file @
96912550
...
...
@@ -31,7 +31,7 @@ from vllm.model_executor.models.persimmon import PersimmonForCausalLM
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
,
PlaceholderRange
)
from
vllm.multimodal.parse
import
(
ImageProcessorItems
,
ImageSize
,
MultiModalDataItems
)
...
...
@@ -232,7 +232,7 @@ class FuyuMultiModalProcessor(BaseMultiModalProcessor[FuyuProcessingInfo]):
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
# Only |SPEAKER| (image) tokens should be considered as placeholders,
...
...
vllm/model_executor/models/llava.py
View file @
96912550
...
...
@@ -24,7 +24,7 @@ from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
)
from
vllm.multimodal.parse
import
(
ImageEmbeddingItems
,
ImageProcessorItems
,
ImageSize
,
MultiModalDataItems
)
...
...
@@ -746,7 +746,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
hf_config
=
self
.
info
.
get_hf_config
()
image_token_id
=
hf_config
.
image_token_index
...
...
@@ -805,7 +805,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
for
modality
,
placeholders
in
mm_placeholders
.
items
()
}
return
MultiModalInputs
V2
(
return
MultiModalInputs
(
type
=
"multimodal"
,
prompt
=
prompt
,
prompt_token_ids
=
prompt_ids
,
...
...
vllm/model_executor/models/phi3v.py
View file @
96912550
...
...
@@ -31,7 +31,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import (
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
,
PlaceholderRange
)
from
vllm.multimodal.parse
import
(
ImageEmbeddingItems
,
ImageProcessorItems
,
ImageSize
,
MultiModalDataItems
)
...
...
@@ -484,7 +484,7 @@ class Phi3VMultiModalProcessor(BaseMultiModalProcessor[Phi3VProcessingInfo]):
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
# Only <|image|> tokens should be considered as placeholders,
...
...
vllm/model_executor/models/qwen2_audio.py
View file @
96912550
...
...
@@ -37,7 +37,7 @@ from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
from
vllm.model_executor.sampling_metadata
import
SamplingMetadata
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalInputs
,
MultiModalKwargs
,
NestedTensors
,
PlaceholderRange
)
from
vllm.multimodal.parse
import
(
AudioProcessorItems
,
MultiModalDataItems
,
MultiModalDataParser
)
...
...
@@ -245,7 +245,7 @@ class Qwen2AudioMultiModalProcessor(
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
result
=
super
().
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
# Only <|AUDIO|> tokens should be considered as placeholders,
...
...
vllm/multimodal/inputs.py
View file @
96912550
...
...
@@ -491,7 +491,7 @@ A dictionary containing placeholder ranges for each modality.
"""
class
MultiModalInputs
V2
(
TypedDict
):
class
MultiModalInputs
(
TypedDict
):
"""
Represents the outputs of
:class:`vllm.multimodal.processing.BaseMultiModalProcessor`,
...
...
vllm/multimodal/processing.py
View file @
96912550
...
...
@@ -18,8 +18,8 @@ from vllm.utils import LRUCache, flatten_2d_lists, full_groupby
from
.hasher
import
MultiModalHasher
from
.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalInputs
V2
,
MultiModalKwargs
,
MultiModalKwargsItem
,
PlaceholderRange
)
MultiModalInputs
,
MultiModalKwargs
,
MultiModalKwargsItem
,
PlaceholderRange
)
from
.parse
import
MultiModalDataItems
,
MultiModalDataParser
if
TYPE_CHECKING
:
...
...
@@ -609,7 +609,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
prompt
:
str
,
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
return
self
.
apply
(
prompt
,
mm_data
,
hf_processor_mm_kwargs
)
def
_get_data_parser
(
self
)
->
MultiModalDataParser
:
...
...
@@ -1067,7 +1067,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
prompt
:
Union
[
str
,
list
[
int
]],
mm_data
:
MultiModalDataDict
,
hf_processor_mm_kwargs
:
Mapping
[
str
,
object
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
"""
Process multi-modal inputs to be used in vLLM.
...
...
@@ -1169,7 +1169,7 @@ class BaseMultiModalProcessor(ABC, Generic[_I]):
for
modality
,
placeholders
in
mm_placeholders
.
items
()
}
return
MultiModalInputs
V2
(
return
MultiModalInputs
(
type
=
"multimodal"
,
prompt
=
prompt
,
prompt_token_ids
=
prompt_ids
,
...
...
vllm/multimodal/profiling.py
View file @
96912550
...
...
@@ -11,7 +11,7 @@ import vllm.envs as envs
from
vllm.inputs
import
DummyData
from
vllm.logger
import
init_logger
from
.inputs
import
MultiModalDataDict
,
MultiModalInputs
V2
from
.inputs
import
MultiModalDataDict
,
MultiModalInputs
from
.processing
import
BaseMultiModalProcessor
,
BaseProcessingInfo
logger
=
init_logger
(
__name__
)
...
...
@@ -131,7 +131,7 @@ class MultiModalProfiler(Generic[_I]):
self
,
seq_len
:
int
,
mm_counts
:
Mapping
[
str
,
int
],
)
->
MultiModalInputs
V2
:
)
->
MultiModalInputs
:
factory
=
self
.
dummy_inputs
processor_inputs
=
factory
.
get_dummy_processor_inputs
(
seq_len
,
mm_counts
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment