Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
952f0347
Commit
952f0347
authored
Jan 05, 2026
by
laibao
Browse files
feat:为 GLM4 和 Llama 模型新增 MultiModalConfigProxy,以支持扁平配置与嵌套的多模态配置(text_config)
parent
fbe8587a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
101 additions
and
8 deletions
+101
-8
vllm/model_executor/models/glm4.py
vllm/model_executor/models/glm4.py
+51
-8
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+50
-0
No files found.
vllm/model_executor/models/glm4.py
View file @
952f0347
...
@@ -32,6 +32,46 @@ import torch
...
@@ -32,6 +32,46 @@ import torch
from
torch
import
nn
from
torch
import
nn
from
transformers
import
Glm4Config
from
transformers
import
Glm4Config
class
MultiModalConfigProxy
:
"""
Proxy class to handle both flat configs (e.g., Glm4Config) and
nested multimodal configs (e.g., Glm4vConfig with text_config).
For multimodal configs where attributes are in text_config, this proxy
transparently delegates attribute access to text_config when needed.
"""
def
__init__
(
self
,
config
):
# Store original config (for attributes that do exist at top level)
object
.
__setattr__
(
self
,
'_config'
,
config
)
def
__getattr__
(
self
,
name
):
# First try to get from the original config (works for flat configs)
try
:
return
getattr
(
self
.
_config
,
name
)
except
AttributeError
:
# If not found and config has text_config, try there
if
hasattr
(
self
.
_config
,
'text_config'
):
return
getattr
(
self
.
_config
.
text_config
,
name
)
# Re-raise the original error if text_config doesn't have it either
raise
AttributeError
(
f
"'
{
type
(
self
.
_config
).
__name__
}
' object has no attribute '
{
name
}
'"
)
def
__setattr__
(
self
,
name
,
value
):
# Allow setting attributes on the proxy itself
if
name
==
'_config'
:
object
.
__setattr__
(
self
,
name
,
value
)
else
:
setattr
(
self
.
_config
,
name
,
value
)
def
__hasattr__
(
self
,
name
):
return
hasattr
(
self
.
_config
,
name
)
or
(
hasattr
(
self
.
_config
,
'text_config'
)
and
hasattr
(
self
.
_config
.
text_config
,
name
)
)
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
...
@@ -151,6 +191,9 @@ class Glm4DecoderLayer(nn.Module):
...
@@ -151,6 +191,9 @@ class Glm4DecoderLayer(nn.Module):
prefix
:
str
=
""
,
prefix
:
str
=
""
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
# Wrap config to handle both flat and nested multimodal configs
config
=
MultiModalConfigProxy
(
config
)
self
.
hidden_size
=
config
.
hidden_size
self
.
hidden_size
=
config
.
hidden_size
rope_theta
=
getattr
(
config
,
"rope_theta"
,
1000000
)
rope_theta
=
getattr
(
config
,
"rope_theta"
,
1000000
)
rope_scaling
=
getattr
(
config
,
"rope_scaling"
,
None
)
rope_scaling
=
getattr
(
config
,
"rope_scaling"
,
None
)
...
@@ -177,14 +220,11 @@ class Glm4DecoderLayer(nn.Module):
...
@@ -177,14 +220,11 @@ class Glm4DecoderLayer(nn.Module):
quant_config
=
quant_config
,
quant_config
=
quant_config
,
prefix
=
f
"
{
prefix
}
.mlp"
,
prefix
=
f
"
{
prefix
}
.mlp"
,
)
)
self
.
input_layernorm
=
RMSNorm
(
config
.
hidden_size
,
rms_norm_eps
=
getattr
(
config
,
'rms_norm_eps'
,
1e-5
)
eps
=
config
.
rms_norm_eps
)
self
.
input_layernorm
=
RMSNorm
(
config
.
hidden_size
,
eps
=
rms_norm_eps
)
self
.
post_attention_layernorm
=
RMSNorm
(
config
.
hidden_size
,
self
.
post_attention_layernorm
=
RMSNorm
(
config
.
hidden_size
,
eps
=
rms_norm_eps
)
eps
=
config
.
rms_norm_eps
)
self
.
post_self_attn_layernorm
=
RMSNorm
(
self
.
hidden_size
,
eps
=
rms_norm_eps
)
self
.
post_self_attn_layernorm
=
RMSNorm
(
config
.
hidden_size
,
self
.
post_mlp_layernorm
=
RMSNorm
(
self
.
hidden_size
,
eps
=
rms_norm_eps
)
eps
=
config
.
rms_norm_eps
)
self
.
post_mlp_layernorm
=
RMSNorm
(
config
.
hidden_size
,
eps
=
config
.
rms_norm_eps
)
def
forward
(
def
forward
(
self
,
self
,
...
@@ -254,6 +294,9 @@ class Glm4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -254,6 +294,9 @@ class Glm4ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
quant_config
=
vllm_config
.
quant_config
quant_config
=
vllm_config
.
quant_config
lora_config
=
vllm_config
.
lora_config
lora_config
=
vllm_config
.
lora_config
# Wrap config to handle both flat and nested multimodal configs
config
=
MultiModalConfigProxy
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
lora_config
=
lora_config
self
.
lora_config
=
lora_config
...
...
vllm/model_executor/models/llama.py
View file @
952f0347
...
@@ -33,6 +33,46 @@ from transformers import LlamaConfig
...
@@ -33,6 +33,46 @@ from transformers import LlamaConfig
import
os
import
os
import
re
import
re
class
MultiModalConfigProxy
:
"""
Proxy class to handle both flat configs (e.g., LlamaConfig) and
nested multimodal configs (e.g., Glm4vConfig with text_config).
For multimodal configs where attributes are in text_config, this proxy
transparently delegates attribute access to text_config when needed.
"""
def
__init__
(
self
,
config
):
# Store original config (for attributes that do exist at top level)
object
.
__setattr__
(
self
,
'_config'
,
config
)
def
__getattr__
(
self
,
name
):
# First try to get from the original config (works for flat configs)
try
:
return
getattr
(
self
.
_config
,
name
)
except
AttributeError
:
# If not found and config has text_config, try there
if
hasattr
(
self
.
_config
,
'text_config'
):
return
getattr
(
self
.
_config
.
text_config
,
name
)
# Re-raise the original error if text_config doesn't have it either
raise
AttributeError
(
f
"'
{
type
(
self
.
_config
).
__name__
}
' object has no attribute '
{
name
}
'"
)
def
__setattr__
(
self
,
name
,
value
):
# Allow setting attributes on the proxy itself
if
name
==
'_config'
:
object
.
__setattr__
(
self
,
name
,
value
)
else
:
setattr
(
self
.
_config
,
name
,
value
)
def
__hasattr__
(
self
,
name
):
return
hasattr
(
self
.
_config
,
name
)
or
(
hasattr
(
self
.
_config
,
'text_config'
)
and
hasattr
(
self
.
_config
.
text_config
,
name
)
)
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.attention
import
Attention
,
AttentionType
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.compilation.decorators
import
support_torch_compile
from
vllm.config
import
CacheConfig
,
VllmConfig
from
vllm.config
import
CacheConfig
,
VllmConfig
...
@@ -246,6 +286,9 @@ class LlamaDecoderLayer(nn.Module):
...
@@ -246,6 +286,9 @@ class LlamaDecoderLayer(nn.Module):
prefix
:
str
=
""
,
prefix
:
str
=
""
,
)
->
None
:
)
->
None
:
super
().
__init__
()
super
().
__init__
()
# Wrap config to handle both flat and nested multimodal configs
config
=
MultiModalConfigProxy
(
config
)
self
.
hidden_size
=
config
.
hidden_size
self
.
hidden_size
=
config
.
hidden_size
rope_theta
=
getattr
(
config
,
"rope_theta"
,
10000
)
rope_theta
=
getattr
(
config
,
"rope_theta"
,
10000
)
rope_scaling
=
getattr
(
config
,
"rope_scaling"
,
None
)
rope_scaling
=
getattr
(
config
,
"rope_scaling"
,
None
)
...
@@ -340,6 +383,9 @@ class LlamaModel(nn.Module):
...
@@ -340,6 +383,9 @@ class LlamaModel(nn.Module):
quant_config
=
vllm_config
.
quant_config
quant_config
=
vllm_config
.
quant_config
lora_config
=
vllm_config
.
lora_config
lora_config
=
vllm_config
.
lora_config
# Wrap config to handle both flat and nested multimodal configs
config
=
MultiModalConfigProxy
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
quant_config
=
quant_config
self
.
quant_config
=
quant_config
lora_vocab
=
(
lora_config
.
lora_extra_vocab_size
*
lora_vocab
=
(
lora_config
.
lora_extra_vocab_size
*
...
@@ -587,6 +633,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -587,6 +633,10 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
config
=
vllm_config
.
model_config
.
hf_config
config
=
vllm_config
.
model_config
.
hf_config
quant_config
=
vllm_config
.
quant_config
quant_config
=
vllm_config
.
quant_config
lora_config
=
vllm_config
.
lora_config
lora_config
=
vllm_config
.
lora_config
# Wrap config to handle both flat and nested multimodal configs
config
=
MultiModalConfigProxy
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
lora_config
=
lora_config
self
.
lora_config
=
lora_config
self
.
model
=
self
.
_init_model
(
vllm_config
=
vllm_config
,
self
.
model
=
self
.
_init_model
(
vllm_config
=
vllm_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment