Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d111bc53
Unverified
Commit
d111bc53
authored
Jan 07, 2026
by
Andy Liu
Committed by
GitHub
Jan 07, 2026
Browse files
[Bugfix][MTP] Fix GLM4 MoE fp8 loading with MTP on (#31757)
Signed-off-by:
Andy Liu
<
andyliu@roblox.com
>
parent
0790f076
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
1 deletion
+6
-1
vllm/model_executor/models/glm4_moe_mtp.py
vllm/model_executor/models/glm4_moe_mtp.py
+6
-1
No files found.
vllm/model_executor/models/glm4_moe_mtp.py
View file @
d111bc53
...
@@ -106,7 +106,7 @@ class Glm4MoeMultiTokenPredictorLayer(nn.Module):
...
@@ -106,7 +106,7 @@ class Glm4MoeMultiTokenPredictorLayer(nn.Module):
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
assert
inputs_embeds
is
not
None
assert
inputs_embeds
is
not
None
# masking inputs at position 0, as not needed by MTP
# masking inputs at position 0, as not needed by MTP
inputs_embeds
[
positions
==
0
]
=
0
inputs_embeds
=
torch
.
where
(
positions
.
unsqueeze
(
-
1
)
==
0
,
0
,
inputs_embeds
)
inputs_embeds
=
self
.
enorm
(
inputs_embeds
)
inputs_embeds
=
self
.
enorm
(
inputs_embeds
)
previous_hidden_states
=
self
.
hnorm
(
previous_hidden_states
)
previous_hidden_states
=
self
.
hnorm
(
previous_hidden_states
)
...
@@ -268,6 +268,11 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
...
@@ -268,6 +268,11 @@ class Glm4MoeMTP(nn.Module, SupportsPP, Glm4MixtureOfExperts):
if
spec_layer
is
None
:
if
spec_layer
is
None
:
continue
continue
name
=
self
.
_rewrite_spec_layer_name
(
spec_layer
,
name
)
name
=
self
.
_rewrite_spec_layer_name
(
spec_layer
,
name
)
# Some checkpoints include weight scale tensors for the LM head even
# when the quantized head isn't built. Skip them if the model does
# not expose a matching parameter to avoid KeyError during load.
if
name
.
endswith
(
".weight_scale"
)
and
name
not
in
params_dict
:
continue
for
param_name
,
weight_name
,
shard_id
in
stacked_params_mapping
:
for
param_name
,
weight_name
,
shard_id
in
stacked_params_mapping
:
# Skip non-stacked layers and experts (experts handled below).
# Skip non-stacked layers and experts (experts handled below).
if
weight_name
not
in
name
:
if
weight_name
not
in
name
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment