Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
ba2f0acc
Unverified
Commit
ba2f0acc
authored
Mar 26, 2026
by
Cyrus Leung
Committed by
GitHub
Mar 25, 2026
Browse files
[Misc] Reorganize inputs (#35182)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
678b3c99
Changes
141
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
28 additions
and
43 deletions
+28
-43
vllm/model_executor/models/ovis2_5.py
vllm/model_executor/models/ovis2_5.py
+1
-1
vllm/model_executor/models/paddleocr_vl.py
vllm/model_executor/models/paddleocr_vl.py
+1
-1
vllm/model_executor/models/paligemma.py
vllm/model_executor/models/paligemma.py
+2
-3
vllm/model_executor/models/phi3v.py
vllm/model_executor/models/phi3v.py
+1
-1
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm.py
+1
-1
vllm/model_executor/models/pixtral.py
vllm/model_executor/models/pixtral.py
+1
-1
vllm/model_executor/models/qwen2_5_omni_thinker.py
vllm/model_executor/models/qwen2_5_omni_thinker.py
+1
-2
vllm/model_executor/models/qwen2_audio.py
vllm/model_executor/models/qwen2_audio.py
+1
-2
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+1
-2
vllm/model_executor/models/qwen3_asr.py
vllm/model_executor/models/qwen3_asr.py
+1
-3
vllm/model_executor/models/qwen3_asr_realtime.py
vllm/model_executor/models/qwen3_asr_realtime.py
+1
-1
vllm/model_executor/models/qwen3_omni_moe_thinker.py
vllm/model_executor/models/qwen3_omni_moe_thinker.py
+3
-6
vllm/model_executor/models/qwen3_vl.py
vllm/model_executor/models/qwen3_vl.py
+1
-1
vllm/model_executor/models/qwen_vl.py
vllm/model_executor/models/qwen_vl.py
+1
-1
vllm/model_executor/models/rvl.py
vllm/model_executor/models/rvl.py
+1
-1
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip.py
+2
-3
vllm/model_executor/models/skyworkr1v.py
vllm/model_executor/models/skyworkr1v.py
+1
-1
vllm/model_executor/models/step3_vl.py
vllm/model_executor/models/step3_vl.py
+1
-1
vllm/model_executor/models/terratorch.py
vllm/model_executor/models/terratorch.py
+3
-6
vllm/model_executor/models/transformers/multimodal.py
vllm/model_executor/models/transformers/multimodal.py
+3
-5
No files found.
vllm/model_executor/models/ovis2_5.py
View file @
ba2f0acc
...
@@ -12,6 +12,7 @@ from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
...
@@ -12,6 +12,7 @@ from transformers import BaseImageProcessor, BatchFeature, PretrainedConfig
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.linear
import
ReplicatedLinear
from
vllm.model_executor.layers.linear
import
ReplicatedLinear
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.models.ovis
import
VisualEmbedding
from
vllm.model_executor.models.ovis
import
VisualEmbedding
...
@@ -24,7 +25,6 @@ from vllm.model_executor.models.utils import (
...
@@ -24,7 +25,6 @@ from vllm.model_executor.models.utils import (
)
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
...
...
vllm/model_executor/models/paddleocr_vl.py
View file @
ba2f0acc
...
@@ -35,6 +35,7 @@ from vllm.config import VllmConfig
...
@@ -35,6 +35,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
parallel_state
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.attention
import
(
from
vllm.model_executor.layers.attention
import
(
MMEncoderAttention
,
MMEncoderAttention
,
)
)
...
@@ -53,7 +54,6 @@ from vllm.model_executor.model_loader.weight_utils import (
...
@@ -53,7 +54,6 @@ from vllm.model_executor.model_loader.weight_utils import (
)
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
...
...
vllm/model_executor/models/paligemma.py
View file @
ba2f0acc
...
@@ -9,12 +9,11 @@ from transformers import BatchFeature, PaliGemmaConfig
...
@@ -9,12 +9,11 @@ from transformers import BatchFeature, PaliGemmaConfig
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
,
MultiModalInput
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalInputs
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
from
vllm.multimodal.parse
import
(
from
vllm.multimodal.parse
import
(
...
@@ -231,7 +230,7 @@ class PaliGemmaMultiModalProcessor(BaseMultiModalProcessor[PaliGemmaProcessingIn
...
@@ -231,7 +230,7 @@ class PaliGemmaMultiModalProcessor(BaseMultiModalProcessor[PaliGemmaProcessingIn
self
,
self
,
inputs
:
ProcessorInputs
,
inputs
:
ProcessorInputs
,
timing_ctx
:
TimingContext
,
timing_ctx
:
TimingContext
,
)
->
MultiModalInput
s
:
)
->
MultiModalInput
:
mm_inputs
=
super
().
apply
(
inputs
,
timing_ctx
)
mm_inputs
=
super
().
apply
(
inputs
,
timing_ctx
)
prompt_token_ids
=
mm_inputs
[
"prompt_token_ids"
]
prompt_token_ids
=
mm_inputs
[
"prompt_token_ids"
]
...
...
vllm/model_executor/models/phi3v.py
View file @
ba2f0acc
...
@@ -30,12 +30,12 @@ from transformers import (
...
@@ -30,12 +30,12 @@ from transformers import (
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.model_executor.layers.vocab_parallel_embedding
import
VocabParallelEmbedding
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
...
...
vllm/model_executor/models/phi4mm.py
View file @
ba2f0acc
...
@@ -18,6 +18,7 @@ from transformers import (
...
@@ -18,6 +18,7 @@ from transformers import (
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_pp_group
from
vllm.distributed
import
get_pp_group
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.logits_processor
import
LogitsProcessor
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
from
vllm.model_executor.layers.vocab_parallel_embedding
import
(
...
@@ -27,7 +28,6 @@ from vllm.model_executor.models.llama import LlamaModel
...
@@ -27,7 +28,6 @@ from vllm.model_executor.models.llama import LlamaModel
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
NestedTensors
,
NestedTensors
,
...
...
vllm/model_executor/models/pixtral.py
View file @
ba2f0acc
...
@@ -25,6 +25,7 @@ from transformers.models.pixtral.modeling_pixtral import (
...
@@ -25,6 +25,7 @@ from transformers.models.pixtral.modeling_pixtral import (
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
from
vllm.model_executor.layers.activation
import
get_act_and_mul_fn
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
@@ -37,7 +38,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
...
@@ -37,7 +38,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalKwargsItems
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
,
MultiModalKwargsItems
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
NestedTensors
,
NestedTensors
,
)
)
...
...
vllm/model_executor/models/qwen2_5_omni_thinker.py
View file @
ba2f0acc
...
@@ -46,6 +46,7 @@ from transformers.models.whisper import WhisperFeatureExtractor
...
@@ -46,6 +46,7 @@ from transformers.models.whisper import WhisperFeatureExtractor
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.forward_context
import
set_forward_context
from
vllm.forward_context
import
set_forward_context
from
vllm.inputs
import
ModalityData
,
MultiModalDataDict
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.qwen2_5_vl
import
(
from
vllm.model_executor.models.qwen2_5_vl
import
(
...
@@ -66,8 +67,6 @@ from vllm.model_executor.models.qwen2_vl import Qwen2VLMultiModalDataParser
...
@@ -66,8 +67,6 @@ from vllm.model_executor.models.qwen2_vl import Qwen2VLMultiModalDataParser
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
ImageItem
,
ImageItem
,
ModalityData
,
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
...
...
vllm/model_executor/models/qwen2_audio.py
View file @
ba2f0acc
...
@@ -38,11 +38,10 @@ from transformers.models.whisper import WhisperFeatureExtractor
...
@@ -38,11 +38,10 @@ from transformers.models.whisper import WhisperFeatureExtractor
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
ModalityData
,
MultiModalDataDict
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
AudioItem
,
AudioItem
,
ModalityData
,
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
ba2f0acc
...
@@ -47,6 +47,7 @@ from vllm.config import VllmConfig
...
@@ -47,6 +47,7 @@ from vllm.config import VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
parallel_state
,
tensor_model_parallel_all_gather
from
vllm.distributed
import
parallel_state
,
tensor_model_parallel_all_gather
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.distributed
import
utils
as
dist_utils
from
vllm.inputs
import
ModalityData
,
MultiModalDataDict
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.activation
import
QuickGELU
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
...
@@ -65,8 +66,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys
...
@@ -65,8 +66,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
ImageItem
,
ImageItem
,
ModalityData
,
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
...
...
vllm/model_executor/models/qwen3_asr.py
View file @
ba2f0acc
...
@@ -33,7 +33,7 @@ from transformers.models.whisper import WhisperFeatureExtractor
...
@@ -33,7 +33,7 @@ from transformers.models.whisper import WhisperFeatureExtractor
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
.data
import
PromptType
,
TokensPrompt
from
vllm.inputs
import
ModalityData
,
MultiModalDataDict
,
PromptType
,
TokensPrompt
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.models.interfaces
import
(
from
vllm.model_executor.models.interfaces
import
(
MultiModalEmbeddings
,
MultiModalEmbeddings
,
...
@@ -59,8 +59,6 @@ from vllm.model_executor.models.whisper import ISO639_1_SUPPORTED_LANGS
...
@@ -59,8 +59,6 @@ from vllm.model_executor.models.whisper import ISO639_1_SUPPORTED_LANGS
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
AudioItem
,
AudioItem
,
ModalityData
,
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
...
...
vllm/model_executor/models/qwen3_asr_realtime.py
View file @
ba2f0acc
...
@@ -23,7 +23,7 @@ import numpy as np
...
@@ -23,7 +23,7 @@ import numpy as np
import
torch
import
torch
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.inputs
.data
import
PromptType
,
TokensPrompt
from
vllm.inputs
import
PromptType
,
TokensPrompt
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.models.interfaces
import
(
from
vllm.model_executor.models.interfaces
import
(
SupportsRealtime
,
SupportsRealtime
,
...
...
vllm/model_executor/models/qwen3_omni_moe_thinker.py
View file @
ba2f0acc
...
@@ -31,6 +31,8 @@ import torch
...
@@ -31,6 +31,8 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
packaging.version
import
Version
from
packaging.version
import
Version
from
transformers
import
PretrainedConfig
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
transformers.feature_extraction_utils
import
BatchFeature
from
transformers.feature_extraction_utils
import
BatchFeature
from
transformers.models.qwen3_omni_moe.configuration_qwen3_omni_moe
import
(
from
transformers.models.qwen3_omni_moe.configuration_qwen3_omni_moe
import
(
Qwen3OmniMoeAudioEncoderConfig
,
Qwen3OmniMoeAudioEncoderConfig
,
...
@@ -42,15 +44,10 @@ from transformers.models.qwen3_omni_moe.processing_qwen3_omni_moe import (
...
@@ -42,15 +44,10 @@ from transformers.models.qwen3_omni_moe.processing_qwen3_omni_moe import (
)
)
from
transformers.models.whisper
import
WhisperFeatureExtractor
from
transformers.models.whisper
import
WhisperFeatureExtractor
# isort: off
from
transformers
import
PretrainedConfig
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
# isort: on
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
SpeechToTextConfig
,
VllmConfig
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_pp_group
,
get_tensor_model_parallel_world_size
from
vllm.inputs
.data
import
PromptType
from
vllm.inputs
import
PromptType
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
...
...
vllm/model_executor/models/qwen3_vl.py
View file @
ba2f0acc
...
@@ -52,6 +52,7 @@ from vllm.compilation.decorators import support_torch_compile
...
@@ -52,6 +52,7 @@ from vllm.compilation.decorators import support_torch_compile
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
,
VideoDummyOptions
from
vllm.distributed
import
get_pp_group
,
parallel_state
from
vllm.distributed
import
get_pp_group
,
parallel_state
from
vllm.inputs
import
MultiModalDataDict
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
from
vllm.model_executor.layers.activation
import
_ACTIVATION_REGISTRY
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
from
vllm.model_executor.layers.attention.mm_encoder_attention
import
(
...
@@ -76,7 +77,6 @@ from vllm.multimodal.evs import (
...
@@ -76,7 +77,6 @@ from vllm.multimodal.evs import (
recompute_mrope_positions
,
recompute_mrope_positions
,
)
)
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalFieldElem
,
MultiModalFieldElem
,
...
...
vllm/model_executor/models/qwen_vl.py
View file @
ba2f0acc
...
@@ -18,6 +18,7 @@ from transformers import BatchFeature
...
@@ -18,6 +18,7 @@ from transformers import BatchFeature
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
...
@@ -30,7 +31,6 @@ from vllm.model_executor.layers.resampler import Resampler2, get_abs_pos
...
@@ -30,7 +31,6 @@ from vllm.model_executor.layers.resampler import Resampler2, get_abs_pos
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.model_executor.models.module_mapping
import
MultiModelKeys
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
...
...
vllm/model_executor/models/rvl.py
View file @
ba2f0acc
...
@@ -9,8 +9,8 @@ from transformers.activations import GELUActivation
...
@@ -9,8 +9,8 @@ from transformers.activations import GELUActivation
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
MultiModalDataDict
from
.llava_next
import
(
from
.llava_next
import
(
LlavaDummyInputsBuilder
,
LlavaDummyInputsBuilder
,
...
...
vllm/model_executor/models/siglip.py
View file @
ba2f0acc
...
@@ -18,6 +18,7 @@ from transformers import (
...
@@ -18,6 +18,7 @@ from transformers import (
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.distributed
import
divide
,
get_tensor_model_parallel_world_size
from
vllm.inputs
import
MultiModalDataDict
,
MultiModalInput
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
(
from
vllm.model_executor.layers.attention
import
(
EncoderOnlyAttention
,
EncoderOnlyAttention
,
...
@@ -38,9 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import (
...
@@ -38,9 +39,7 @@ from vllm.model_executor.model_loader.weight_utils import (
)
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalInputs
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
from
vllm.multimodal.parse
import
(
from
vllm.multimodal.parse
import
(
...
@@ -193,7 +192,7 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
...
@@ -193,7 +192,7 @@ class SiglipMultiModalProcessor(BaseMultiModalProcessor[SiglipProcessingInfo]):
self
,
self
,
inputs
:
ProcessorInputs
,
inputs
:
ProcessorInputs
,
timing_ctx
:
TimingContext
,
timing_ctx
:
TimingContext
,
)
->
MultiModalInput
s
:
)
->
MultiModalInput
:
if
inputs
.
mm_data_items
:
if
inputs
.
mm_data_items
:
if
isinstance
(
inputs
.
prompt
,
str
):
if
isinstance
(
inputs
.
prompt
,
str
):
if
len
(
inputs
.
prompt
)
>
0
:
if
len
(
inputs
.
prompt
)
>
0
:
...
...
vllm/model_executor/models/skyworkr1v.py
View file @
ba2f0acc
...
@@ -16,6 +16,7 @@ from transformers import PretrainedConfig
...
@@ -16,6 +16,7 @@ from transformers import PretrainedConfig
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.linear
import
ReplicatedLinear
from
vllm.model_executor.layers.linear
import
ReplicatedLinear
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
...
@@ -24,7 +25,6 @@ from vllm.model_executor.models.intern_vit import (
...
@@ -24,7 +25,6 @@ from vllm.model_executor.models.intern_vit import (
InternVisionPatchModel
,
InternVisionPatchModel
,
)
)
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
MultiModalDataDict
from
vllm.multimodal.processing
import
BaseDummyInputsBuilder
from
vllm.multimodal.processing
import
BaseDummyInputsBuilder
from
vllm.sequence
import
IntermediateTensors
from
vllm.sequence
import
IntermediateTensors
from
vllm.transformers_utils.processors.internvl
import
(
from
vllm.transformers_utils.processors.internvl
import
(
...
...
vllm/model_executor/models/step3_vl.py
View file @
ba2f0acc
...
@@ -13,6 +13,7 @@ from transformers import BatchFeature
...
@@ -13,6 +13,7 @@ from transformers import BatchFeature
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.distributed
import
get_tensor_model_parallel_world_size
from
vllm.inputs
import
MultiModalDataDict
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.activation
import
get_act_fn
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.attention
import
MMEncoderAttention
from
vllm.model_executor.layers.conv
import
Conv2dLayer
from
vllm.model_executor.layers.conv
import
Conv2dLayer
...
@@ -24,7 +25,6 @@ from vllm.model_executor.layers.linear import (
...
@@ -24,7 +25,6 @@ from vllm.model_executor.layers.linear import (
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.model_executor.layers.quantization
import
QuantizationConfig
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
)
)
...
...
vllm/model_executor/models/terratorch.py
View file @
ba2f0acc
...
@@ -34,6 +34,7 @@ from transformers import BatchFeature
...
@@ -34,6 +34,7 @@ from transformers import BatchFeature
from
vllm.config
import
VllmConfig
from
vllm.config
import
VllmConfig
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.config.multimodal
import
BaseDummyOptions
from
vllm.inputs
import
ModalityData
,
MultiModalDataDict
,
MultiModalInput
,
mm_input
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.layers.pooler
import
IdentityPooler
from
vllm.model_executor.layers.pooler
import
IdentityPooler
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
from
vllm.model_executor.model_loader.weight_utils
import
default_weight_loader
...
@@ -41,13 +42,9 @@ from vllm.model_executor.models.utils import AutoWeightsLoader
...
@@ -41,13 +42,9 @@ from vllm.model_executor.models.utils import AutoWeightsLoader
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
ImageItem
,
ImageItem
,
ModalityData
,
MultiModalDataDict
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalInputs
,
MultiModalKwargsItems
,
MultiModalKwargsItems
,
PlaceholderRange
,
PlaceholderRange
,
mm_inputs
,
)
)
from
vllm.multimodal.parse
import
(
from
vllm.multimodal.parse
import
(
DictEmbeddingItems
,
DictEmbeddingItems
,
...
@@ -196,7 +193,7 @@ class TerratorchMultiModalProcessor(BaseMultiModalProcessor[TerratorchProcessing
...
@@ -196,7 +193,7 @@ class TerratorchMultiModalProcessor(BaseMultiModalProcessor[TerratorchProcessing
self
,
self
,
inputs
:
ProcessorInputs
,
inputs
:
ProcessorInputs
,
timing_ctx
:
TimingContext
,
timing_ctx
:
TimingContext
,
)
->
MultiModalInput
s
:
)
->
MultiModalInput
:
mm_items
=
inputs
.
mm_data_items
mm_items
=
inputs
.
mm_data_items
hf_processor_mm_kwargs
=
inputs
.
hf_processor_mm_kwargs
hf_processor_mm_kwargs
=
inputs
.
hf_processor_mm_kwargs
...
@@ -224,7 +221,7 @@ class TerratorchMultiModalProcessor(BaseMultiModalProcessor[TerratorchProcessing
...
@@ -224,7 +221,7 @@ class TerratorchMultiModalProcessor(BaseMultiModalProcessor[TerratorchProcessing
mm_placeholders
=
{
"image"
:
[
PlaceholderRange
(
offset
=
0
,
length
=
0
)]}
mm_placeholders
=
{
"image"
:
[
PlaceholderRange
(
offset
=
0
,
length
=
0
)]}
return
mm_input
s
(
return
mm_input
(
prompt_token_ids
=
[
1
],
prompt_token_ids
=
[
1
],
mm_kwargs
=
mm_kwargs
,
mm_kwargs
=
mm_kwargs
,
mm_hashes
=
mm_hashes
,
mm_hashes
=
mm_hashes
,
...
...
vllm/model_executor/models/transformers/multimodal.py
View file @
ba2f0acc
...
@@ -22,16 +22,14 @@ from typing import TYPE_CHECKING
...
@@ -22,16 +22,14 @@ from typing import TYPE_CHECKING
import
torch
import
torch
from
vllm.config.utils
import
getattr_iter
from
vllm.config.utils
import
getattr_iter
from
vllm.inputs
import
MultiModalDataDict
,
MultiModalInput
,
mm_input
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
from
vllm.model_executor.models.interfaces
import
SupportsMRoPE
,
SupportsMultiModal
from
vllm.model_executor.models.interfaces
import
SupportsMRoPE
,
SupportsMultiModal
from
vllm.multimodal
import
MultiModalKwargsItems
from
vllm.multimodal
import
MultiModalKwargsItems
from
vllm.multimodal.inputs
import
(
from
vllm.multimodal.inputs
import
(
MultiModalDataDict
,
MultiModalFeatureSpec
,
MultiModalFeatureSpec
,
MultiModalFieldConfig
,
MultiModalFieldConfig
,
MultiModalInputs
,
PlaceholderRange
,
PlaceholderRange
,
mm_inputs
,
)
)
from
vllm.multimodal.parse
import
(
from
vllm.multimodal.parse
import
(
ImageProcessorItems
,
ImageProcessorItems
,
...
@@ -179,7 +177,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
...
@@ -179,7 +177,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
self
,
self
,
inputs
:
ProcessorInputs
,
inputs
:
ProcessorInputs
,
timing_ctx
:
TimingContext
,
timing_ctx
:
TimingContext
,
)
->
MultiModalInput
s
:
)
->
MultiModalInput
:
"""
"""
Process multi-modal inputs to be used in vLLM.
Process multi-modal inputs to be used in vLLM.
...
@@ -261,7 +259,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
...
@@ -261,7 +259,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
with
timing_ctx
.
record
(
"get_mm_hashes"
):
with
timing_ctx
.
record
(
"get_mm_hashes"
):
mm_hashes
=
inputs
.
get_mm_hashes
(
self
.
info
.
model_id
)
mm_hashes
=
inputs
.
get_mm_hashes
(
self
.
info
.
model_id
)
return
mm_input
s
(
return
mm_input
(
prompt_token_ids
=
prompt_ids
,
prompt_token_ids
=
prompt_ids
,
mm_kwargs
=
mm_kwargs
,
mm_kwargs
=
mm_kwargs
,
mm_hashes
=
mm_hashes
,
mm_hashes
=
mm_hashes
,
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment