Unverified Commit c46b932d authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[Chore] Deprecate `SupportsMultiModal.merge_by_field_config` (#30170)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 64763823
...@@ -509,8 +509,6 @@ class InternS1MultiModalProcessor(BaseMultiModalProcessor[InternS1ProcessingInfo ...@@ -509,8 +509,6 @@ class InternS1MultiModalProcessor(BaseMultiModalProcessor[InternS1ProcessingInfo
class InternS1ForConditionalGeneration( class InternS1ForConditionalGeneration(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA
): ):
merge_by_field_config = True
# To ensure correct weight loading and mapping. # To ensure correct weight loading and mapping.
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
......
...@@ -1074,8 +1074,6 @@ class InternVLMultiModalProcessor( ...@@ -1074,8 +1074,6 @@ class InternVLMultiModalProcessor(
dummy_inputs=InternVLDummyInputsBuilder, dummy_inputs=InternVLDummyInputsBuilder,
) )
class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True
supports_encoder_tp_data = True supports_encoder_tp_data = True
@classmethod @classmethod
......
...@@ -1292,8 +1292,6 @@ class KeyeMultiModalProcessor(BaseMultiModalProcessor[KeyeProcessingInfo]): ...@@ -1292,8 +1292,6 @@ class KeyeMultiModalProcessor(BaseMultiModalProcessor[KeyeProcessingInfo]):
class BaseKeyeModule(nn.Module): class BaseKeyeModule(nn.Module):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": [ "qkv_proj": [
"q_proj", "q_proj",
......
...@@ -298,8 +298,6 @@ class KimiVLMultiModalProcessor(BaseMultiModalProcessor[KimiVLProcessingInfo]): ...@@ -298,8 +298,6 @@ class KimiVLMultiModalProcessor(BaseMultiModalProcessor[KimiVLProcessingInfo]):
dummy_inputs=KimiVLDummyInputsBuilder, dummy_inputs=KimiVLDummyInputsBuilder,
) )
class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class KimiVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
supports_encoder_tp_data = True supports_encoder_tp_data = True
@classmethod @classmethod
......
...@@ -506,8 +506,6 @@ def init_vision_tower_for_llava( ...@@ -506,8 +506,6 @@ def init_vision_tower_for_llava(
dummy_inputs=LlavaDummyInputsBuilder, dummy_inputs=LlavaDummyInputsBuilder,
) )
class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"], "gate_up_proj": ["gate_proj", "up_proj"],
......
...@@ -223,8 +223,6 @@ class LlavaNextMultiModalProcessor( ...@@ -223,8 +223,6 @@ class LlavaNextMultiModalProcessor(
dummy_inputs=LlavaDummyInputsBuilder, dummy_inputs=LlavaDummyInputsBuilder,
) )
class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52 # mapping for new names in checkpoint saved after transformers v4.52
......
...@@ -299,8 +299,6 @@ class LlavaNextMultiModalProjector(nn.Module): ...@@ -299,8 +299,6 @@ class LlavaNextMultiModalProjector(nn.Module):
dummy_inputs=LlavaNextVideoDummyInputsBuilder, dummy_inputs=LlavaNextVideoDummyInputsBuilder,
) )
class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class LlavaNextVideoForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52 # mapping for new names in checkpoint saved after transformers v4.52
......
...@@ -479,8 +479,6 @@ class LlavaOnevisionMultiModalProjector(nn.Module): ...@@ -479,8 +479,6 @@ class LlavaOnevisionMultiModalProjector(nn.Module):
dummy_inputs=LlavaOnevisionDummyInputsBuilder, dummy_inputs=LlavaOnevisionDummyInputsBuilder,
) )
class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class LlavaOnevisionForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
# mapping for new names in checkpoint saved after transformers v4.52 # mapping for new names in checkpoint saved after transformers v4.52
......
...@@ -683,8 +683,6 @@ class MiDashengLMMultiModalProcessor( ...@@ -683,8 +683,6 @@ class MiDashengLMMultiModalProcessor(
dummy_inputs=MiDashengLMDummyInputsBuilder, dummy_inputs=MiDashengLMDummyInputsBuilder,
) )
class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP): class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": [ "qkv_proj": [
"q_proj", "q_proj",
......
...@@ -1003,8 +1003,6 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP): ...@@ -1003,8 +1003,6 @@ class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP):
instantiated. instantiated.
""" """
merge_by_field_config = True
supports_encoder_tp_data = True supports_encoder_tp_data = True
@classmethod @classmethod
......
...@@ -179,8 +179,6 @@ class MiniMaxVL01MultiModalProcessor( ...@@ -179,8 +179,6 @@ class MiniMaxVL01MultiModalProcessor(
dummy_inputs=MiniMaxVL01DummyInputsBuilder, dummy_inputs=MiniMaxVL01DummyInputsBuilder,
) )
class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP): class MiniMaxVL01ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"], "gate_up_proj": ["gate_proj", "up_proj"],
......
...@@ -423,8 +423,6 @@ def init_vision_tower_for_llava( ...@@ -423,8 +423,6 @@ def init_vision_tower_for_llava(
class Mistral3ForConditionalGeneration( class Mistral3ForConditionalGeneration(
nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP nn.Module, SupportsLoRA, SupportsMultiModal, SupportsPP
): ):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"], "gate_up_proj": ["gate_proj", "up_proj"],
......
...@@ -741,8 +741,6 @@ class Llama4ForConditionalGeneration( ...@@ -741,8 +741,6 @@ class Llama4ForConditionalGeneration(
SupportsEagle3, SupportsEagle3,
SupportsLoRA, SupportsLoRA,
): ):
merge_by_field_config = True
packed_modules_mapping = { packed_modules_mapping = {
"qkv_proj": ["q_proj", "k_proj", "v_proj"], "qkv_proj": ["q_proj", "k_proj", "v_proj"],
"gate_up_proj": ["gate_proj", "up_proj"], "gate_up_proj": ["gate_proj", "up_proj"],
......
...@@ -1354,8 +1354,6 @@ class MolmoMultiModalProcessor(BaseMultiModalProcessor[MolmoProcessingInfo]): ...@@ -1354,8 +1354,6 @@ class MolmoMultiModalProcessor(BaseMultiModalProcessor[MolmoProcessingInfo]):
class MolmoForCausalLM( class MolmoForCausalLM(
nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsQuant nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA, SupportsQuant
): ):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_substr={ orig_to_new_substr={
# vision backbone mapping # vision backbone mapping
......
...@@ -1116,8 +1116,6 @@ class NanoNemotronVLDummyInputsBuilder( ...@@ -1116,8 +1116,6 @@ class NanoNemotronVLDummyInputsBuilder(
class NemotronH_Nano_VL_V2( class NemotronH_Nano_VL_V2(
nn.Module, HasInnerState, IsHybrid, SupportsMultiModal, SupportsMultiModalPruning nn.Module, HasInnerState, IsHybrid, SupportsMultiModal, SupportsMultiModalPruning
): ):
merge_by_field_config = True
@classmethod @classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None: def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"): if modality.startswith("image"):
......
...@@ -358,8 +358,6 @@ class NemotronVLProcessingInfo(BaseInternVLProcessingInfo): ...@@ -358,8 +358,6 @@ class NemotronVLProcessingInfo(BaseInternVLProcessingInfo):
dummy_inputs=BaseInternVLDummyInputsBuilder[NemotronVLProcessingInfo], dummy_inputs=BaseInternVLDummyInputsBuilder[NemotronVLProcessingInfo],
) )
class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA):
merge_by_field_config = True
@classmethod @classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None: def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"): if modality.startswith("image"):
......
...@@ -201,7 +201,6 @@ class OpenCUADummyInputsBuilder(Qwen2VLDummyInputsBuilder): ...@@ -201,7 +201,6 @@ class OpenCUADummyInputsBuilder(Qwen2VLDummyInputsBuilder):
dummy_inputs=OpenCUADummyInputsBuilder, dummy_inputs=OpenCUADummyInputsBuilder,
) )
class OpenCUAForConditionalGeneration(Qwen2_5_VLForConditionalGeneration): class OpenCUAForConditionalGeneration(Qwen2_5_VLForConditionalGeneration):
merge_by_field_config = True
multimodal_cpu_fields = {"image_grid_thw"} multimodal_cpu_fields = {"image_grid_thw"}
packed_modules_mapping = { packed_modules_mapping = {
......
...@@ -414,8 +414,6 @@ class OvisMultiModalProcessor(BaseMultiModalProcessor[OvisProcessingInfo]): ...@@ -414,8 +414,6 @@ class OvisMultiModalProcessor(BaseMultiModalProcessor[OvisProcessingInfo]):
dummy_inputs=OvisDummyInputsBuilder, dummy_inputs=OvisDummyInputsBuilder,
) )
class Ovis(nn.Module, SupportsMultiModal, SupportsPP): class Ovis(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
@classmethod @classmethod
def get_placeholder_str(cls, modality: str, i: int) -> str | None: def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"): if modality.startswith("image"):
......
...@@ -456,8 +456,6 @@ class Ovis2_5MultiModalProcessor(BaseMultiModalProcessor[Ovis2_5ProcessingInfo]) ...@@ -456,8 +456,6 @@ class Ovis2_5MultiModalProcessor(BaseMultiModalProcessor[Ovis2_5ProcessingInfo])
dummy_inputs=Ovis2_5DummyInputsBuilder, dummy_inputs=Ovis2_5DummyInputsBuilder,
) )
class Ovis2_5(nn.Module, SupportsMultiModal, SupportsPP): class Ovis2_5(nn.Module, SupportsMultiModal, SupportsPP):
merge_by_field_config = True
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__() super().__init__()
config = vllm_config.model_config.hf_config config = vllm_config.model_config.hf_config
......
...@@ -1103,8 +1103,6 @@ class SiglipVisionModel(nn.Module): ...@@ -1103,8 +1103,6 @@ class SiglipVisionModel(nn.Module):
dummy_inputs=PaddleOCRVLDummyInputsBuilder, dummy_inputs=PaddleOCRVLDummyInputsBuilder,
) )
class PaddleOCRVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsMRoPE): class PaddleOCRVLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsMRoPE):
merge_by_field_config = True
hf_to_vllm_mapper = WeightsMapper( hf_to_vllm_mapper = WeightsMapper(
orig_to_new_prefix={ orig_to_new_prefix={
"model.": "language_model.model.", "model.": "language_model.model.",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment