Merge branch 'v0.9.2-dev-tc_opt' into 'v0.9.2-dev'

feat：为 GLM4 和 Llama 模型新增 MultiModalConfigProxy，以支持扁平配置与嵌套的多模态配置（text_config） See merge request dcutoolkit/deeplearing/vllm!336

Merge branch 'v0.9.2-dev-tc_opt' into 'v0.9.2-dev'
feat：为 GLM4 和 Llama 模型新增 MultiModalConfigProxy，以支持扁平配置与嵌套的多模态配置（text_config） See merge request dcutoolkit/deeplearing/vllm!336
ffc00331 · zhuwenwen · d916e714 · 952f0347 · ffc00331 · ffc00331
Commit ffc00331 authored Jan 05, 2026 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 101 additions and 8 deletions

vllm/model_executor/models/glm4.py vllm/model_executor/models/glm4.py +51 -8

vllm/model_executor/models/llama.py vllm/model_executor/models/llama.py +50 -0

No files found.
--- a/vllm/model_executor/models/glm4.py
+++ b/vllm/model_executor/models/glm4.py
@@ -32,6 +32,46 @@ import torch
 from torch import nn
 from transformers import Glm4Config

+
+class MultiModalConfigProxy:
+    """
+    Proxy class to handle both flat configs (e.g., Glm4Config) and 
+    nested multimodal configs (e.g., Glm4vConfig with text_config).
+    
+    For multimodal configs where attributes are in text_config, this proxy
+    transparently delegates attribute access to text_config when needed.
+    """
+    
+    def __init__(self, config):
+        # Store original config (for attributes that do exist at top level)
+        object.__setattr__(self, '_config', config)
+    
+    def __getattr__(self, name):
+        # First try to get from the original config (works for flat configs)
+        try:
+            return getattr(self._config, name)
+        except AttributeError:
+            # If not found and config has text_config, try there
+            if hasattr(self._config, 'text_config'):
+                return getattr(self._config.text_config, name)
+            # Re-raise the original error if text_config doesn't have it either
+            raise AttributeError(
+                f"'{type(self._config).__name__}' object has no attribute '{name}'"
+            )
+    
+    def __setattr__(self, name, value):
+        # Allow setting attributes on the proxy itself
+        if name == '_config':
+            object.__setattr__(self, name, value)
+        else:
+            setattr(self._config, name, value)
+    
+    def __hasattr__(self, name):
+        return hasattr(self._config, name) or (
+            hasattr(self._config, 'text_config') and 
+            hasattr(self._config.text_config, name)
+        )
+
 from vllm.attention import Attention, AttentionType
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, VllmConfig
@@ -151,6 +191,9 @@ class Glm4DecoderLayer(nn.Module):
        prefix: str = "",
    ) -> None:
        super().__init__()
+        # Wrap config to handle both flat and nested multimodal configs
+        config = MultiModalConfigProxy(config)
+        
        self.hidden_size = config.hidden_size
        rope_theta = getattr(config, "rope_theta", 1000000)
        rope_scaling = getattr(config, "rope_scaling", None)
@@ -177,14 +220,11 @@ class Glm4DecoderLayer(nn.Module):
            quant_config=quant_config,
            prefix=f"{prefix}.mlp",
        )
-        self.input_layernorm = RMSNorm(config.hidden_size,
-                                       eps=config.rms_norm_eps)
-        self.post_attention_layernorm = RMSNorm(config.hidden_size,
-                                                eps=config.rms_norm_eps)
-        self.post_self_attn_layernorm = RMSNorm(config.hidden_size,
-                                                eps=config.rms_norm_eps)
-        self.post_mlp_layernorm = RMSNorm(config.hidden_size,
-                                          eps=config.rms_norm_eps)
+        rms_norm_eps = getattr(config, 'rms_norm_eps', 1e-5)
+        self.input_layernorm = RMSNorm(config.hidden_size, eps=rms_norm_eps)
+        self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=rms_norm_eps)
+        self.post_self_attn_layernorm = RMSNorm(self.hidden_size, eps=rms_norm_eps)
+        self.post_mlp_layernorm = RMSNorm(self.hidden_size, eps=rms_norm_eps)

    def forward(
        self,
@@ -254,6 +294,9 @@ class Glm4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
        quant_config = vllm_config.quant_config
        lora_config = vllm_config.lora_config

+        # Wrap config to handle both flat and nested multimodal configs
+        config = MultiModalConfigProxy(config)
+        
        self.config = config
        self.lora_config = lora_config


--- a/vllm/model_executor/models/llama.py
+++ b/vllm/model_executor/models/llama.py
@@ -33,6 +33,46 @@ from transformers import LlamaConfig
 import os
 import re

+
+class MultiModalConfigProxy:
+    """
+    Proxy class to handle both flat configs (e.g., LlamaConfig) and 
+    nested multimodal configs (e.g., Glm4vConfig with text_config).
+    
+    For multimodal configs where attributes are in text_config, this proxy
+    transparently delegates attribute access to text_config when needed.
+    """
+    
+    def __init__(self, config):
+        # Store original config (for attributes that do exist at top level)
+        object.__setattr__(self, '_config', config)
+    
+    def __getattr__(self, name):
+        # First try to get from the original config (works for flat configs)
+        try:
+            return getattr(self._config, name)
+        except AttributeError:
+            # If not found and config has text_config, try there
+            if hasattr(self._config, 'text_config'):
+                return getattr(self._config.text_config, name)
+            # Re-raise the original error if text_config doesn't have it either
+            raise AttributeError(
+                f"'{type(self._config).__name__}' object has no attribute '{name}'"
+            )
+    
+    def __setattr__(self, name, value):
+        # Allow setting attributes on the proxy itself
+        if name == '_config':
+            object.__setattr__(self, name, value)
+        else:
+            setattr(self._config, name, value)
+    
+    def __hasattr__(self, name):
+        return hasattr(self._config, name) or (
+            hasattr(self._config, 'text_config') and 
+            hasattr(self._config.text_config, name)
+        )
+
 from vllm.attention import Attention, AttentionType
 from vllm.compilation.decorators import support_torch_compile
 from vllm.config import CacheConfig, VllmConfig
@@ -246,6 +286,9 @@ class LlamaDecoderLayer(nn.Module):
        prefix: str = "",
    ) -> None:
        super().__init__()
+        # Wrap config to handle both flat and nested multimodal configs
+        config = MultiModalConfigProxy(config)
+        
        self.hidden_size = config.hidden_size
        rope_theta = getattr(config, "rope_theta", 10000)
        rope_scaling = getattr(config, "rope_scaling", None)
@@ -340,6 +383,9 @@ class LlamaModel(nn.Module):
        quant_config = vllm_config.quant_config
        lora_config = vllm_config.lora_config

+        # Wrap config to handle both flat and nested multimodal configs
+        config = MultiModalConfigProxy(config)
+        
        self.config = config
        self.quant_config = quant_config
        lora_vocab = (lora_config.lora_extra_vocab_size *
@@ -587,6 +633,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
        config = vllm_config.model_config.hf_config
        quant_config = vllm_config.quant_config
        lora_config = vllm_config.lora_config
+        
+        # Wrap config to handle both flat and nested multimodal configs
+        config = MultiModalConfigProxy(config)
+        
        self.config = config
        self.lora_config = lora_config
        self.model = self._init_model(vllm_config=vllm_config,