Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
2db90e2c
Commit
2db90e2c
authored
Apr 16, 2025
by
zhuwenwen
Browse files
update moe models of LM_NN
parent
31330101
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
11 additions
and
9 deletions
+11
-9
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_v2.py
+9
-9
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral.py
+1
-0
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_moe.py
+1
-0
No files found.
vllm/model_executor/models/deepseek_v2.py
View file @
2db90e2c
...
...
@@ -580,15 +580,15 @@ class DeepseekV2DecoderLayer(nn.Module):
hidden_states
=
hidden_states
,
)
if
hidden_states
.
dtype
==
torch
.
float16
:
# Fix FP16 overflow
# We scale both hidden_states and residual before
# rmsnorm, and rmsnorm result would not affect by scale.
hidden_states
*=
1.
/
self
.
routed_scaling_factor
if
self
.
layer_idx
==
0
:
# The residual is shared by all layers, we only scale it on
# first layer.
residual
*=
1.
/
self
.
routed_scaling_factor
#
if hidden_states.dtype == torch.float16:
#
# Fix FP16 overflow
#
# We scale both hidden_states and residual before
#
# rmsnorm, and rmsnorm result would not affect by scale.
#
hidden_states *= 1. / self.routed_scaling_factor
#
if self.layer_idx == 0:
#
# The residual is shared by all layers, we only scale it on
#
# first layer.
#
residual *= 1. / self.routed_scaling_factor
# Fully Connected
hidden_states
,
residual
=
self
.
post_attention_layernorm
(
...
...
vllm/model_executor/models/mixtral.py
View file @
2db90e2c
...
...
@@ -427,6 +427,7 @@ class MixtralModel(nn.Module):
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
os
.
environ
[
'LM_NN'
]
=
'0'
matches
=
re
.
findall
(
combined_words
,
layername
)
...
...
vllm/model_executor/models/qwen2_moe.py
View file @
2db90e2c
...
...
@@ -511,6 +511,7 @@ class Qwen2MoeModel(nn.Module):
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
os
.
environ
[
'LM_NN'
]
=
'0'
# if self.use_fa_pad and (re.findall(qkv_bias_words, layername)):
# weight.data = pad_weight(weight.data, 32)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment