Commit ffc00331 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-tc_opt' into 'v0.9.2-dev'

feat:为 GLM4 和 Llama 模型新增 MultiModalConfigProxy,以支持扁平配置与嵌套的多模态配置(text_config)

See merge request dcutoolkit/deeplearing/vllm!336
parents d916e714 952f0347
......@@ -32,6 +32,46 @@ import torch
from torch import nn
from transformers import Glm4Config
class MultiModalConfigProxy:
"""
Proxy class to handle both flat configs (e.g., Glm4Config) and
nested multimodal configs (e.g., Glm4vConfig with text_config).
For multimodal configs where attributes are in text_config, this proxy
transparently delegates attribute access to text_config when needed.
"""
def __init__(self, config):
# Store original config (for attributes that do exist at top level)
object.__setattr__(self, '_config', config)
def __getattr__(self, name):
# First try to get from the original config (works for flat configs)
try:
return getattr(self._config, name)
except AttributeError:
# If not found and config has text_config, try there
if hasattr(self._config, 'text_config'):
return getattr(self._config.text_config, name)
# Re-raise the original error if text_config doesn't have it either
raise AttributeError(
f"'{type(self._config).__name__}' object has no attribute '{name}'"
)
def __setattr__(self, name, value):
# Allow setting attributes on the proxy itself
if name == '_config':
object.__setattr__(self, name, value)
else:
setattr(self._config, name, value)
def __hasattr__(self, name):
return hasattr(self._config, name) or (
hasattr(self._config, 'text_config') and
hasattr(self._config.text_config, name)
)
from vllm.attention import Attention, AttentionType
from vllm.compilation.decorators import support_torch_compile
from vllm.config import CacheConfig, VllmConfig
......@@ -151,6 +191,9 @@ class Glm4DecoderLayer(nn.Module):
prefix: str = "",
) -> None:
super().__init__()
# Wrap config to handle both flat and nested multimodal configs
config = MultiModalConfigProxy(config)
self.hidden_size = config.hidden_size
rope_theta = getattr(config, "rope_theta", 1000000)
rope_scaling = getattr(config, "rope_scaling", None)
......@@ -177,14 +220,11 @@ class Glm4DecoderLayer(nn.Module):
quant_config=quant_config,
prefix=f"{prefix}.mlp",
)
self.input_layernorm = RMSNorm(config.hidden_size,
eps=config.rms_norm_eps)
self.post_attention_layernorm = RMSNorm(config.hidden_size,
eps=config.rms_norm_eps)
self.post_self_attn_layernorm = RMSNorm(config.hidden_size,
eps=config.rms_norm_eps)
self.post_mlp_layernorm = RMSNorm(config.hidden_size,
eps=config.rms_norm_eps)
rms_norm_eps = getattr(config, 'rms_norm_eps', 1e-5)
self.input_layernorm = RMSNorm(config.hidden_size, eps=rms_norm_eps)
self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=rms_norm_eps)
self.post_self_attn_layernorm = RMSNorm(self.hidden_size, eps=rms_norm_eps)
self.post_mlp_layernorm = RMSNorm(self.hidden_size, eps=rms_norm_eps)
def forward(
self,
......@@ -254,6 +294,9 @@ class Glm4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
quant_config = vllm_config.quant_config
lora_config = vllm_config.lora_config
# Wrap config to handle both flat and nested multimodal configs
config = MultiModalConfigProxy(config)
self.config = config
self.lora_config = lora_config
......
......@@ -33,6 +33,46 @@ from transformers import LlamaConfig
import os
import re
class MultiModalConfigProxy:
"""
Proxy class to handle both flat configs (e.g., LlamaConfig) and
nested multimodal configs (e.g., Glm4vConfig with text_config).
For multimodal configs where attributes are in text_config, this proxy
transparently delegates attribute access to text_config when needed.
"""
def __init__(self, config):
# Store original config (for attributes that do exist at top level)
object.__setattr__(self, '_config', config)
def __getattr__(self, name):
# First try to get from the original config (works for flat configs)
try:
return getattr(self._config, name)
except AttributeError:
# If not found and config has text_config, try there
if hasattr(self._config, 'text_config'):
return getattr(self._config.text_config, name)
# Re-raise the original error if text_config doesn't have it either
raise AttributeError(
f"'{type(self._config).__name__}' object has no attribute '{name}'"
)
def __setattr__(self, name, value):
# Allow setting attributes on the proxy itself
if name == '_config':
object.__setattr__(self, name, value)
else:
setattr(self._config, name, value)
def __hasattr__(self, name):
return hasattr(self._config, name) or (
hasattr(self._config, 'text_config') and
hasattr(self._config.text_config, name)
)
from vllm.attention import Attention, AttentionType
from vllm.compilation.decorators import support_torch_compile
from vllm.config import CacheConfig, VllmConfig
......@@ -246,6 +286,9 @@ class LlamaDecoderLayer(nn.Module):
prefix: str = "",
) -> None:
super().__init__()
# Wrap config to handle both flat and nested multimodal configs
config = MultiModalConfigProxy(config)
self.hidden_size = config.hidden_size
rope_theta = getattr(config, "rope_theta", 10000)
rope_scaling = getattr(config, "rope_scaling", None)
......@@ -340,6 +383,9 @@ class LlamaModel(nn.Module):
quant_config = vllm_config.quant_config
lora_config = vllm_config.lora_config
# Wrap config to handle both flat and nested multimodal configs
config = MultiModalConfigProxy(config)
self.config = config
self.quant_config = quant_config
lora_vocab = (lora_config.lora_extra_vocab_size *
......@@ -587,6 +633,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
lora_config = vllm_config.lora_config
# Wrap config to handle both flat and nested multimodal configs
config = MultiModalConfigProxy(config)
self.config = config
self.lora_config = lora_config
self.model = self._init_model(vllm_config=vllm_config,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment