Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
99d49945
Commit
99d49945
authored
Feb 10, 2025
by
zhuwenwen
Browse files
update model layout
parent
92d43fd5
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
27 additions
and
15 deletions
+27
-15
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+1
-2
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/baichuan.py
+4
-2
vllm/model_executor/models/bloom.py
vllm/model_executor/models/bloom.py
+2
-1
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/chatglm.py
+2
-1
vllm/model_executor/models/deepseek_v3.py
vllm/model_executor/models/deepseek_v3.py
+2
-1
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon.py
+2
-1
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+2
-1
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen.py
+6
-3
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+2
-1
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+2
-1
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen2_vl.py
+2
-1
No files found.
vllm/model_executor/model_loader/utils.py
View file @
99d49945
...
@@ -79,8 +79,7 @@ def get_model_architecture(
...
@@ -79,8 +79,7 @@ def get_model_architecture(
model_config
:
ModelConfig
)
->
Tuple
[
Type
[
nn
.
Module
],
str
]:
model_config
:
ModelConfig
)
->
Tuple
[
Type
[
nn
.
Module
],
str
]:
architectures
=
getattr
(
model_config
.
hf_config
,
"architectures"
,
[])
architectures
=
getattr
(
model_config
.
hf_config
,
"architectures"
,
[])
visions
=
getattr
(
model_config
.
hf_config
,
"visual"
,
[])
or
getattr
(
model_config
.
hf_config
,
"vision_config"
,
[])
visions
=
getattr
(
model_config
.
hf_config
,
"visual"
,
[])
or
getattr
(
model_config
.
hf_config
,
"vision_config"
,
[])
# 'Qwen2VLForConditionalGeneration'
support_nn_architectures
=
[
'LlamaForCausalLM'
,
'QWenLMHeadModel'
,
'Qwen2ForCausalLM'
,
'Qwen2VLForConditionalGeneration'
,
'Qwen2MoeForCausalLM'
,
'ChatGLMModel'
,
'ChatGLMForConditionalGeneration'
,
support_nn_architectures
=
[
'LlamaForCausalLM'
,
'QWenLMHeadModel'
,
'Qwen2ForCausalLM'
,
'Qwen2MoeForCausalLM'
,
'ChatGLMModel'
,
'ChatGLMForConditionalGeneration'
,
'BaichuanForCausalLM'
,
'BloomForCausalLM'
,
'MedusaModel'
,
'MixtralForCausalLM'
,
'MLPSpeculatorPreTrainedModel'
,
'FalconForCausalLM'
,
'DeepseekV3ForCausalLM'
]
'BaichuanForCausalLM'
,
'BloomForCausalLM'
,
'MedusaModel'
,
'MixtralForCausalLM'
,
'MLPSpeculatorPreTrainedModel'
,
'FalconForCausalLM'
,
'DeepseekV3ForCausalLM'
]
if
any
(
arch
in
architectures
for
arch
in
support_nn_architectures
):
if
any
(
arch
in
architectures
for
arch
in
support_nn_architectures
):
if
os
.
getenv
(
'LLAMA_NN'
)
!=
'0'
:
if
os
.
getenv
(
'LLAMA_NN'
)
!=
'0'
:
...
...
vllm/model_executor/models/baichuan.py
View file @
99d49945
...
@@ -499,7 +499,8 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -499,7 +499,8 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
# lay_qkv_words = ["self_attn.W_pack.weight"]
# lay_qkv_words = ["self_attn.W_pack.weight"]
# qkv_words = "|".join(lay_qkv_words)
# qkv_words = "|".join(lay_qkv_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
:
if
matches
:
# if self.use_gemm_pad and gemm_bank_conf(weight.data.shape[0]):
# if self.use_gemm_pad and gemm_bank_conf(weight.data.shape[0]):
...
@@ -526,7 +527,8 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -526,7 +527,8 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
]
]
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
:
if
matches
:
...
...
vllm/model_executor/models/bloom.py
View file @
99d49945
...
@@ -419,7 +419,8 @@ class BloomForCausalLM(nn.Module, SupportsPP):
...
@@ -419,7 +419,8 @@ class BloomForCausalLM(nn.Module, SupportsPP):
# lay_qkv_bias_words = ["self_attention.query_key_value.bias"]
# lay_qkv_bias_words = ["self_attention.query_key_value.bias"]
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# qkv_bias_words = "|".join(lay_qkv_bias_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# weight.data = pad_weight(weight.data, 32)
# weight.data = pad_weight(weight.data, 32)
...
...
vllm/model_executor/models/chatglm.py
View file @
99d49945
...
@@ -691,7 +691,8 @@ class ChatGLMModel(nn.Module):
...
@@ -691,7 +691,8 @@ class ChatGLMModel(nn.Module):
# lay_qkv_bias_words = ["self_attention.query_key_value.bias"]
# lay_qkv_bias_words = ["self_attention.query_key_value.bias"]
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# qkv_bias_words = "|".join(lay_qkv_bias_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
if
"lm_head.weight"
in
layername
and
weight
.
shape
[
1
]
==
4096
:
if
"lm_head.weight"
in
layername
and
weight
.
shape
[
1
]
==
4096
:
lay_key_words
.
append
(
"lm_head.weight"
)
lay_key_words
.
append
(
"lm_head.weight"
)
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
...
...
vllm/model_executor/models/deepseek_v3.py
View file @
99d49945
...
@@ -832,7 +832,8 @@ class DeepseekV3ForCausalLM(nn.Module, SupportsPP):
...
@@ -832,7 +832,8 @@ class DeepseekV3ForCausalLM(nn.Module, SupportsPP):
])
])
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
if
"lm_head.weight"
in
layername
:
if
"lm_head.weight"
in
layername
:
lay_key_words
.
append
(
"lm_head.weight"
)
lay_key_words
.
append
(
"lm_head.weight"
)
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
...
...
vllm/model_executor/models/falcon.py
View file @
99d49945
...
@@ -562,7 +562,8 @@ class FalconForCausalLM(nn.Module, SupportsPP):
...
@@ -562,7 +562,8 @@ class FalconForCausalLM(nn.Module, SupportsPP):
# lay_qkv_words = ["self_attention.query_key_value.weight"]
# lay_qkv_words = ["self_attention.query_key_value.weight"]
# qkv_words = "|".join(lay_qkv_words)
# qkv_words = "|".join(lay_qkv_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
:
if
matches
:
# if self.use_gemm_pad and gemm_bank_conf(weight.data.shape[0]):
# if self.use_gemm_pad and gemm_bank_conf(weight.data.shape[0]):
...
...
vllm/model_executor/models/mixtral.py
View file @
99d49945
...
@@ -522,7 +522,8 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
...
@@ -522,7 +522,8 @@ class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
]
]
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
if
"lm_head.weight"
in
layername
:
if
"lm_head.weight"
in
layername
:
lay_key_words
.
append
(
"lm_head.weight"
)
lay_key_words
.
append
(
"lm_head.weight"
)
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
...
...
vllm/model_executor/models/qwen.py
View file @
99d49945
...
@@ -1109,7 +1109,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -1109,7 +1109,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
# lay_qkv_bias_words = ["attn.c_attn.bias"]
# lay_qkv_bias_words = ["attn.c_attn.bias"]
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# qkv_bias_words = "|".join(lay_qkv_bias_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# weight.data = pad_weight(weight.data, 32)
# weight.data = pad_weight(weight.data, 32)
...
@@ -1139,7 +1140,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -1139,7 +1140,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
]
]
combined_words
=
"|"
.
join
(
lay_key_words
)
combined_words
=
"|"
.
join
(
lay_key_words
)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
:
if
matches
:
...
@@ -1182,7 +1184,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
...
@@ -1182,7 +1184,8 @@ class QWenBaseModel(nn.Module, SupportsPP, SupportsLoRA):
weight_shapes
=
[]
weight_shapes
=
[]
all_json
=
{}
all_json
=
{}
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
and
"scale"
not
in
layername
:
if
matches
and
"scale"
not
in
layername
:
weight_data
=
params_dict
[
layername
]
weight_data
=
params_dict
[
layername
]
...
...
vllm/model_executor/models/qwen2.py
View file @
99d49945
...
@@ -537,7 +537,8 @@ class Qwen2Model(nn.Module):
...
@@ -537,7 +537,8 @@ class Qwen2Model(nn.Module):
weight_shapes
=
[]
weight_shapes
=
[]
all_json
=
{}
all_json
=
{}
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
matches
=
re
.
findall
(
combined_words
,
layername
)
matches
=
re
.
findall
(
combined_words
,
layername
)
if
matches
and
"scale"
not
in
layername
:
if
matches
and
"scale"
not
in
layername
:
weight_data
=
params_dict
[
layername
]
weight_data
=
params_dict
[
layername
]
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
99d49945
...
@@ -575,7 +575,8 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP):
...
@@ -575,7 +575,8 @@ class Qwen2MoeForCausalLM(nn.Module, SupportsPP):
# lay_qkv_bias_words = ["self_attn.qkv_proj.bias"]
# lay_qkv_bias_words = ["self_attn.qkv_proj.bias"]
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# qkv_bias_words = "|".join(lay_qkv_bias_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# weight.data = pad_weight(weight.data, 32)
# weight.data = pad_weight(weight.data, 32)
...
...
vllm/model_executor/models/qwen2_vl.py
View file @
99d49945
...
@@ -685,7 +685,8 @@ class Qwen2VisionTransformer(nn.Module):
...
@@ -685,7 +685,8 @@ class Qwen2VisionTransformer(nn.Module):
# lay_qkv_bias_words = ["attn.qkv.bias"]
# lay_qkv_bias_words = ["attn.qkv.bias"]
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# qkv_bias_words = "|".join(lay_qkv_bias_words)
for
layername
,
weight
in
params_dict
.
items
():
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# weight.data = pad_weight(weight.data, 32)
# weight.data = pad_weight(weight.data, 32)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment