Unverified Commit ba2f0acc authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Misc] Reorganize inputs (#35182)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 678b3c99
......@@ -19,6 +19,7 @@ from transformers.utils import torch_int
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.inputs import ModalityData, MultiModalDataDict
from vllm.logger import init_logger
from vllm.model_executor.layers.attention import (
MMEncoderAttention,
......@@ -41,8 +42,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
ImageItem,
ModalityData,
MultiModalDataDict,
MultiModalFeatureSpec,
MultiModalFieldConfig,
MultiModalKwargsItems,
......
......@@ -14,13 +14,13 @@ from transformers.activations import GELUActivation
from transformers.feature_extraction_utils import BatchFeature
from vllm.config import VllmConfig
from vllm.inputs import ModalityData
from vllm.logger import init_logger
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
ImageItem,
ModalityData,
MultiModalFeatureSpec,
MultiModalFieldConfig,
MultiModalKwargsItems,
......
......@@ -14,7 +14,7 @@ from transformers import WhisperConfig as HFWhisperConfig
from vllm.config import ModelConfig, SpeechToTextConfig, VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs.data import PromptType, TokensPrompt
from vllm.inputs import PromptType, TokensPrompt
from vllm.model_executor.model_loader import DefaultModelLoader
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.interfaces import (
......
......@@ -16,6 +16,7 @@ from transformers import BatchFeature
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.quantization.compressed_tensors import (
......@@ -35,7 +36,6 @@ from vllm.model_executor.models.kimi_k25_vit import (
)
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
NestedTensors,
......
......@@ -54,12 +54,12 @@ from transformers.activations import GELUActivation
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.linear import ReplicatedLinear
from vllm.model_executor.models.interfaces import SupportsMultiModal, SupportsPP
from vllm.model_executor.models.moonvit import MoonVitPretrainedModel
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
NestedTensors,
......
......@@ -21,6 +21,7 @@ from transformers.models.lfm2_vl.image_processing_lfm2_vl_fast import (
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.forward_context import set_forward_context
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.mamba.mamba_utils import (
MambaStateCopyFunc,
MambaStateCopyFuncCalculator,
......@@ -30,7 +31,6 @@ from vllm.model_executor.layers.mamba.mamba_utils import (
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -20,17 +20,15 @@ from transformers.models.pixtral import PixtralProcessor
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict, MultiModalInput, mm_input
from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.cache import BaseMultiModalProcessorCache
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalInputs,
MultiModalKwargsItems,
mm_inputs,
)
from vllm.multimodal.parse import (
ImageEmbeddingItems,
......@@ -777,7 +775,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
self,
inputs: ProcessorInputs,
timing_ctx: TimingContext,
) -> MultiModalInputs:
) -> MultiModalInput:
hf_config = self.info.get_hf_config()
image_token_id = hf_config.image_token_index
......@@ -833,7 +831,7 @@ class MantisMultiModalProcessor(LlavaMultiModalProcessor):
for modality, placeholders in mm_placeholders.items()
}
return mm_inputs(
return mm_input(
prompt_token_ids=prompt_ids,
mm_kwargs=mm_kwargs,
mm_hashes=mm_hashes,
......
......@@ -11,11 +11,11 @@ from transformers import BatchFeature, LlavaNextVideoConfig, LlavaNextVideoProce
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.models.clip import CLIPVisionModel
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -15,10 +15,10 @@ from transformers.models.llava_onevision.modeling_llava_onevision import (
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.activation import get_act_fn
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -38,6 +38,7 @@ from transformers import BatchFeature
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.layers.conv import Conv2dLayer
from vllm.model_executor.layers.linear import (
......@@ -48,7 +49,6 @@ from vllm.model_executor.layers.linear import (
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -41,9 +41,9 @@ from transformers.models.whisper.modeling_whisper import (
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import ModalityData, MultiModalDataDict
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
NestedTensors,
)
......@@ -51,7 +51,6 @@ from vllm.multimodal.parse import (
AudioItem,
AudioProcessorItems,
DictEmbeddingItems,
ModalityData,
ModalityDataItems,
MultiModalDataItems,
)
......
......@@ -41,6 +41,7 @@ from typing_extensions import TypeVar
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import ModalityData, MultiModalDataDict
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.layers.resampler import (
BaseResampler,
......@@ -54,7 +55,6 @@ from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM
from vllm.model_executor.models.qwen3 import Qwen3ForCausalLM
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
NestedTensors,
......@@ -64,7 +64,6 @@ from vllm.multimodal.parse import (
ImageItem,
ImageProcessorItems,
ImageSize,
ModalityData,
ModalityDataItems,
MultiModalDataItems,
MultiModalDataParser,
......
......@@ -11,6 +11,7 @@ from transformers.models.pixtral import PixtralProcessor
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.layers.layernorm import RMSNorm
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
......@@ -18,7 +19,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -38,6 +38,7 @@ from vllm.compilation.decorators import (
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.config.multimodal import BaseDummyOptions
from vllm.distributed import get_tensor_model_parallel_world_size
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.attention import MMEncoderAttention
from vllm.model_executor.layers.fused_moe import FusedMoE
from vllm.model_executor.layers.linear import (
......@@ -53,7 +54,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -29,6 +29,7 @@ from vllm.distributed import (
split_tensor_along_last_dim,
tensor_model_parallel_all_gather,
)
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.activation import MulAndSilu, QuickGELU, SiluAndMul
from vllm.model_executor.layers.attention import Attention, MMEncoderAttention
from vllm.model_executor.layers.layernorm import RMSNorm
......@@ -49,7 +50,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -33,6 +33,7 @@ from vllm.distributed import (
split_tensor_along_last_dim,
tensor_model_parallel_all_gather,
)
from vllm.inputs import MultiModalDataDict
from vllm.logger import init_logger
from vllm.model_executor.layers.activation import MulAndSilu, SiluAndMul, get_act_fn
from vllm.model_executor.layers.attention import Attention, MMEncoderAttention
......@@ -54,7 +55,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
VideoItem,
......
......@@ -21,6 +21,7 @@ from transformers import BatchFeature
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions, VideoDummyOptions
from vllm.inputs import MultiModalDataDict, MultiModalInput
from vllm.logger import init_logger
from vllm.model_executor.layers.activation import ReLUSquaredActivation
from vllm.model_executor.layers.layernorm import RMSNorm
......@@ -48,9 +49,7 @@ from vllm.multimodal.evs import (
from vllm.multimodal.inputs import (
AudioItem,
BatchedTensorInputs,
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalInputs,
MultiModalKwargsItems,
VideoItem,
)
......@@ -576,7 +575,7 @@ class NanoNemotronVLMultiModalProcessor(
self,
inputs: ProcessorInputs,
timing_ctx: TimingContext,
) -> MultiModalInputs:
) -> MultiModalInput:
use_audio_in_video = bool(
inputs.hf_processor_mm_kwargs.get("use_audio_in_video", False)
)
......@@ -632,7 +631,7 @@ class NanoNemotronVLMultiModalProcessor(
for modality, placeholders in mm_placeholders.items()
}
return MultiModalInputs(
return MultiModalInput(
type="multimodal",
prompt_token_ids=prompt_ids,
mm_kwargs=mm_info.kwargs,
......
......@@ -23,6 +23,7 @@ from transformers import (
from vllm.config import CacheConfig, VllmConfig
from vllm.config.lora import LoRAConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.logger import init_logger
from vllm.model_executor.layers.activation import get_act_fn
from vllm.model_executor.layers.linear import ColumnParallelLinear, RowParallelLinear
......@@ -41,7 +42,6 @@ from vllm.model_executor.models.radio import RadioModel
from vllm.model_executor.models.whisper import WhisperAttention, WhisperCrossAttention
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
......@@ -14,12 +14,10 @@ import torch.nn as nn
from transformers import PretrainedConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
BatchedTensorInputs,
MultiModalDataDict,
)
from vllm.multimodal.inputs import BatchedTensorInputs
from vllm.multimodal.parse import (
ImageEmbeddingItems,
ImageProcessorItems,
......
......@@ -30,6 +30,7 @@ from transformers import BatchFeature, PretrainedConfig
from vllm.config import VllmConfig
from vllm.config.multimodal import BaseDummyOptions
from vllm.inputs import MultiModalDataDict
from vllm.model_executor.layers.linear import ReplicatedLinear
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.models.aimv2 import AIMv2Model
......@@ -42,7 +43,6 @@ from vllm.model_executor.models.utils import (
)
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import (
MultiModalDataDict,
MultiModalFieldConfig,
MultiModalKwargsItems,
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment