Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
61898ea3
Commit
61898ea3
authored
Apr 24, 2025
by
zhuwenwen
Browse files
fix llama and qwen layout
parent
4a7d8ab8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
5 deletions
+18
-5
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/utils.py
+0
-1
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/model_loader/weight_utils.py
+8
-0
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+5
-2
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+5
-2
No files found.
vllm/model_executor/model_loader/utils.py
View file @
61898ea3
...
...
@@ -86,7 +86,6 @@ def get_model_architecture(
model_config
:
ModelConfig
)
->
Tuple
[
Type
[
nn
.
Module
],
str
]:
architectures
=
getattr
(
model_config
.
hf_config
,
"architectures"
,
[])
visions
=
getattr
(
model_config
.
hf_config
,
"visual"
,
[])
or
getattr
(
model_config
.
hf_config
,
"vision_config"
,
[])
# TODO: support deepseek distillation series models ( 'LlamaForCausalLM', 'Qwen2ForCausalLM' )
support_nn_architectures
=
[
'LlamaForCausalLM'
,
'Qwen2ForCausalLM'
,
'QWenLMHeadModel'
,
'Qwen2VLForConditionalGeneration'
,
'Qwen2_5_VLForConditionalGeneration'
,
'Qwen2MoeForCausalLM'
,
'ChatGLMModel'
,
'ChatGLMForConditionalGeneration'
,
'BaichuanForCausalLM'
,
'BloomForCausalLM'
,
'MedusaModel'
,
'MixtralForCausalLM'
,
'MLPSpeculatorPreTrainedModel'
,
'FalconForCausalLM'
,
'DeepseekV2ForCausalLM'
,
'DeepseekV3ForCausalLM'
,
'DeepSeekMTPModel'
]
if
any
(
arch
in
architectures
for
arch
in
support_nn_architectures
):
...
...
vllm/model_executor/model_loader/weight_utils.py
View file @
61898ea3
...
...
@@ -432,6 +432,11 @@ def safetensors_weights_iterator(
use_tqdm_on_load
:
bool
,
)
->
Generator
[
Tuple
[
str
,
torch
.
Tensor
],
None
,
None
]:
"""Iterate over the weights in the model safetensor files."""
total_count
=
0
for
st_file
in
hf_weights_files
:
with
safe_open
(
st_file
,
framework
=
"pt"
)
as
f
:
total_count
+=
len
(
f
.
keys
())
current_count
=
0
for
st_file
in
tqdm
(
hf_weights_files
,
desc
=
"Loading safetensors checkpoint shards"
,
...
...
@@ -440,7 +445,10 @@ def safetensors_weights_iterator(
):
with
safe_open
(
st_file
,
framework
=
"pt"
)
as
f
:
for
name
in
f
.
keys
():
# noqa: SIM118
current_count
+=
1
param
=
f
.
get_tensor
(
name
)
param
.
current_count
=
current_count
param
.
total_count
=
total_count
yield
name
,
param
...
...
vllm/model_executor/models/llama.py
View file @
61898ea3
...
...
@@ -409,6 +409,8 @@ class LlamaModel(nn.Module):
params_dict
=
dict
(
self
.
named_parameters
())
loaded_params
:
Set
[
str
]
=
set
()
for
name
,
loaded_weight
in
weights
:
current_count
=
loaded_weight
.
current_count
total_count
=
loaded_weight
.
total_count
if
"rotary_emb.inv_freq"
in
name
:
continue
if
(
"rotary_emb.cos_cached"
in
name
...
...
@@ -461,7 +463,7 @@ class LlamaModel(nn.Module):
weight_loader
(
param
,
loaded_weight
)
loaded_params
.
add
(
name
)
if
self
.
use_llama_nn
and
self
.
quant_method
is
None
:
if
self
.
use_llama_nn
and
self
.
quant_method
is
None
and
current_count
==
total_count
:
lay_key_words
=
[
"self_attn.qkv_proj.weight"
,
"self_attn.o_proj.weight"
,
...
...
@@ -474,7 +476,8 @@ class LlamaModel(nn.Module):
# qkv_words = "|".join(lay_qkv_words)
# for layername, weight in params_dict.items():
for
layername
in
loaded_params
:
# for layername in loaded_params:
for
layername
in
params_dict
.
keys
():
weight
=
params_dict
[
layername
]
if
"lm_head.weight"
in
layername
and
weight
.
shape
[
1
]
>=
4096
:
lay_key_words
.
append
(
"lm_head.weight"
)
...
...
vllm/model_executor/models/qwen2.py
View file @
61898ea3
...
...
@@ -394,6 +394,8 @@ class Qwen2Model(nn.Module):
params_dict
=
dict
(
self
.
named_parameters
(
remove_duplicate
=
False
))
loaded_params
:
Set
[
str
]
=
set
()
for
name
,
loaded_weight
in
weights
:
current_count
=
loaded_weight
.
current_count
total_count
=
loaded_weight
.
total_count
if
"rotary_emb.inv_freq"
in
name
:
continue
if
(
self
.
quant_config
is
not
None
and
...
...
@@ -436,7 +438,7 @@ class Qwen2Model(nn.Module):
weight_loader
(
param
,
loaded_weight
)
loaded_params
.
add
(
name
)
if
self
.
use_llama_nn
and
self
.
quant_method
is
None
:
if
self
.
use_llama_nn
and
self
.
quant_method
is
None
and
current_count
==
total_count
:
lay_key_words
=
[
"self_attn.qkv_proj.weight"
,
"self_attn.o_proj.weight"
,
...
...
@@ -452,7 +454,8 @@ class Qwen2Model(nn.Module):
# qkv_bias_words = "|".join(lay_qkv_bias_words)
# for layername, weight in params_dict.items():
for
layername
in
loaded_params
:
# for layername in loaded_params:
for
layername
in
params_dict
.
keys
():
weight
=
params_dict
[
layername
]
if
"lm_head.weight"
in
layername
and
weight
.
shape
[
1
]
>=
3584
:
lay_key_words
.
append
(
"lm_head.weight"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment