Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
01ad27fa
Unverified
Commit
01ad27fa
authored
Oct 14, 2025
by
CSWYF3634076
Committed by
GitHub
Oct 14, 2025
Browse files
[Model][Bugfix]fix ernie45 load failed due to ernie45 eplb code (#26684)
Signed-off-by:
wangyafeng
<
wangyafeng@baidu.com
>
parent
481545b3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
12 deletions
+22
-12
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_moe.py
+22
-12
No files found.
vllm/model_executor/models/ernie45_moe.py
View file @
01ad27fa
...
...
@@ -23,7 +23,8 @@
# limitations under the License.
"""Inference-only ErineMoE model compatible with HuggingFace weights."""
from
collections.abc
import
Iterable
import
typing
from
collections.abc
import
Callable
,
Iterable
from
itertools
import
islice
from
typing
import
Any
...
...
@@ -139,10 +140,10 @@ class Ernie4_5_MoeMoE(nn.Module):
# Load balancing settings.
vllm_config
=
get_current_vllm_config
()
parallel
_config
=
vllm_config
.
parallel_config
eplb
_config
=
vllm_config
.
parallel_config
.
eplb_config
self
.
enable_eplb
=
enable_eplb
self
.
n_redundant_experts
=
parallel
_config
.
num_redundant_experts
self
.
n_redundant_experts
=
eplb
_config
.
num_redundant_experts
self
.
n_logical_experts
=
self
.
n_routed_experts
self
.
n_physical_experts
=
self
.
n_logical_experts
+
self
.
n_redundant_experts
self
.
n_local_physical_experts
=
self
.
n_physical_experts
//
self
.
ep_size
...
...
@@ -426,8 +427,10 @@ class Ernie4_5_MoeModel(nn.Module):
self
.
vocab_size
=
config
.
vocab_size
self
.
config
=
config
parallel_config
=
vllm_config
.
parallel_config
eplb_config
=
parallel_config
.
eplb_config
enable_eplb
=
parallel_config
.
enable_eplb
self
.
num_redundant_experts
=
parallel_config
.
num_redundant_experts
self
.
num_redundant_experts
=
eplb_config
.
num_redundant_experts
if
get_pp_group
().
is_first_rank
:
self
.
embed_tokens
=
VocabParallelEmbedding
(
...
...
@@ -570,19 +573,26 @@ class Ernie4_5_MoeModel(nn.Module):
# Skip loading extra bias for GPTQ models.
if
(
name
.
endswith
(
".bias"
)
or
name
.
endswith
(
"_bias"
)
)
and
name
not
in
params_dict
:
name
_mapped
.
endswith
(
".bias"
)
or
name
_mapped
.
endswith
(
"_bias"
)
)
and
name
_mapped
not
in
params_dict
:
continue
param
=
params_dict
[
name
]
weight_loader
=
param
.
weight_loader
weight_loader
(
param
=
params_dict
[
name_mapped
]
# We should ask the weight loader to return success or not
# here since otherwise we may skip experts with other
# available replicas.
weight_loader
=
typing
.
cast
(
Callable
[...,
bool
],
param
.
weight_loader
)
success
=
weight_loader
(
param
,
loaded_weight
,
name
,
name
_mapped
,
shard_id
=
shard_id
,
expert_id
=
expert_id
,
return_success
=
True
,
)
if
success
:
name
=
name_mapped
break
else
:
if
is_expert_weight
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment