Unverified Commit c46b932d authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Deprecate `SupportsMultiModal.merge_by_field_config` (#30170)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 64763823
......@@ -251,8 +251,6 @@ class PaliGemmaMultiModalProcessor(BaseMultiModalProcessor[PaliGemmaProcessingIn
dummy_inputs=PaliGemmaDummyInputsBuilder,
)
class PaliGemmaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
......
......@@ -562,8 +562,6 @@ class Phi3VMultiModalProcessor(BaseMultiModalProcessor[Phi3VProcessingInfo]):
dummy_inputs=Phi3VDummyInputsBuilder,
)
class Phi3VForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, SupportsQuant):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"model.vision_embed_tokens.wte": "embed_tokens",
......
......@@ -984,8 +984,6 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
Implements the Phi-4-multimodal-instruct model in vLLM.
"""
merge_by_field_config = True
packed_modules_mapping = {
"qkv_proj": [
"qkv_proj",
......
......@@ -365,8 +365,6 @@ class PixtralMultiModalProcessor(BaseMultiModalProcessor[PixtralProcessingInfo])
dummy_inputs=PixtralDummyInputsBuilder,
)
class PixtralForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
......
......@@ -773,8 +773,6 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
SupportsMRoPE,
Qwen2_5OmniConditionalGenerationMixin,
):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"thinker.lm_head.": "language_model.lm_head.",
......
......@@ -1039,7 +1039,6 @@ class Qwen2_5_VLForConditionalGeneration(
SupportsMultiModalPruning,
SupportsMRoPE,
):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw", "video_grid_thw"}
packed_modules_mapping = {
......
......@@ -313,8 +313,6 @@ class Qwen2AudioMultiModalProcessor(BaseMultiModalProcessor[Qwen2AudioProcessing
dummy_inputs=Qwen2AudioDummyInputsBuilder,
)
class Qwen2AudioForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("audio"):
......
......@@ -1131,7 +1131,6 @@ class Qwen2VLMultiModalProcessor(BaseMultiModalProcessor[Qwen2VLProcessingInfo])
class Qwen2VLForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, SupportsMRoPE
):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw", "video_grid_thw"}
# To ensure correct weight loading and mapping.
......
......@@ -1131,8 +1131,6 @@ class Qwen3OmniMoeThinkerForConditionalGeneration(
SupportsMRoPE,
Qwen3OmniMoeConditionalGenerationMixin,
):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"thinker.lm_head.": "language_model.lm_head.",
......
......@@ -1190,7 +1190,6 @@ class Qwen3VLForConditionalGeneration(
SupportsMRoPE,
SupportsEagle3,
):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw", "video_grid_thw"}
packed_modules_mapping = {
......
......@@ -703,8 +703,6 @@ class QwenVLMultiModalProcessor(BaseMultiModalProcessor[QwenVLProcessingInfo]):
class QwenVLForConditionalGeneration(
QWenBaseModel, SupportsPP, SupportsLoRA, SupportsMultiModal
):
merge_by_field_config = True
packed_modules_mapping = {
"c_attn": ["c_attn"],
"gate_up_proj": [
......
......@@ -989,7 +989,6 @@ class SiglipEmbeddingModel(nn.Module, SupportsMultiModal, SupportsQuant):
is_pooling_model = True
packed_modules_mapping = {"qkv_proj": ["q_proj", "k_proj", "v_proj"]}
merge_by_field_config = True
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
......
......@@ -647,8 +647,6 @@ class SkyworkR1VMultiModalProcessor(BaseMultiModalProcessor[SkyworkR1VProcessing
dummy_inputs=SkyworkR1VDummyInputsBuilder,
)
class SkyworkR1VChatModel(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
......
......@@ -916,8 +916,6 @@ class Step3VisionTransformer(nn.Module):
dummy_inputs=Step3VLDummyInputsBuilder,
)
class Step3VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={
"model.": "language_model.model.",
......
......@@ -400,8 +400,6 @@ def init_vision_tower_for_tarsier(
dummy_inputs=TarsierDummyInputsBuilder,
)
class TarsierForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
......
......@@ -227,7 +227,6 @@ class TerratorchMultiModalProcessor(BaseMultiModalProcessor):
dummy_inputs=TerratorchInputBuilder,
)
class Terratorch(nn.Module, IsAttentionFree, SupportsMultiModal):
merge_by_field_config = True
supports_multimodal_raw_input_only = True
is_pooling_model = True
......
......@@ -264,7 +264,7 @@ class MultiModalProcessor(BaseMultiModalProcessor[MultiModalProcessingInfo]):
class MultiModalMixin(SupportsMultiModal, SupportsMRoPE):
supports_multimodal_raw_input_only = True
merge_by_field_config = True
# Backwards compatibility for prev released models. State dicts back then
# had different formats and cannot be loaded with `AutoModel` mapping as is
hf_to_vllm_mapper = WeightsMapper(
......
......@@ -498,8 +498,6 @@ class ModifiedWhisperEncoder(WhisperEncoder):
dummy_inputs=UltravoxDummyInputsBuilder,
)
class UltravoxModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True
packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"],
......
......@@ -330,8 +330,6 @@ class VoxtralMultiModalProcessor(BaseMultiModalProcessor[VoxtralProcessingInfo])
class VoxtralForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsTranscription
):
merge_by_field_config = True
supported_languages = ISO639_1_SUPPORTED_LANGS
packed_modules_mapping = {
......
......@@ -775,7 +775,6 @@ class WhisperMultiModalProcessor(EncDecMultiModalProcessor[WhisperProcessingInfo
class WhisperForConditionalGeneration(
nn.Module, SupportsTranscription, SupportsMultiModal
):
merge_by_field_config = True
packed_modules_mapping = {
"self_attn.qkv_proj": [
"self_attn.q_proj",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment