Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fbd88728
Unverified
Commit
fbd88728
authored
Aug 15, 2025
by
Benjamin Chislett
Committed by
GitHub
Aug 16, 2025
Browse files
[Bugfix] Fix DeepSeek MTP (#22934)
Signed-off-by:
Benjamin Chislett
<
benjamin.chislett@centml.ai
>
parent
070da660
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
13 additions
and
14 deletions
+13
-14
vllm/model_executor/models/deepseek_mtp.py
vllm/model_executor/models/deepseek_mtp.py
+7
-6
vllm/model_executor/models/glm4_moe_mtp.py
vllm/model_executor/models/glm4_moe_mtp.py
+3
-4
vllm/model_executor/models/mimo_mtp.py
vllm/model_executor/models/mimo_mtp.py
+3
-4
No files found.
vllm/model_executor/models/deepseek_mtp.py
View file @
fbd88728
...
...
@@ -158,14 +158,13 @@ class DeepSeekMTP(nn.Module, SupportsPP):
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
previous_
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
spec_step_idx
:
int
=
0
,
)
->
torch
.
Tensor
:
hidden_states
=
self
.
model
(
input_ids
,
positions
,
previous_hidden_states
,
inputs_embeds
,
spec_step_idx
)
hidden_states
=
self
.
model
(
input_ids
,
positions
,
hidden_states
,
inputs_embeds
,
spec_step_idx
)
return
hidden_states
def
compute_logits
(
...
...
@@ -213,13 +212,15 @@ class DeepSeekMTP(nn.Module, SupportsPP):
# for mlp.experts[0].gate_gate_up_proj, which breaks load.
if
((
"mlp.experts."
in
name
)
and
name
not
in
params_dict
):
continue
name
=
name
.
replace
(
weight_name
,
param_name
)
name
_mapped
=
name
.
replace
(
weight_name
,
param_name
)
# QKV fusion is optional, fall back to normal
# weight loading if it's not enabled
if
((
param_name
==
"fused_qkv_a_proj"
)
and
name
not
in
params_dict
):
and
name
_mapped
not
in
params_dict
):
continue
else
:
name
=
name_mapped
# Skip loading extra bias for GPTQ models.
if
name
.
endswith
(
".bias"
)
and
name
not
in
params_dict
:
...
...
vllm/model_executor/models/glm4_moe_mtp.py
View file @
fbd88728
...
...
@@ -180,14 +180,13 @@ class Glm4MoeMTP(nn.Module, SupportsPP):
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
previous_
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
spec_step_idx
:
int
=
0
,
)
->
torch
.
Tensor
:
hidden_states
=
self
.
model
(
input_ids
,
positions
,
previous_hidden_states
,
inputs_embeds
,
spec_step_idx
)
hidden_states
=
self
.
model
(
input_ids
,
positions
,
hidden_states
,
inputs_embeds
,
spec_step_idx
)
return
hidden_states
def
compute_logits
(
...
...
vllm/model_executor/models/mimo_mtp.py
View file @
fbd88728
...
...
@@ -164,15 +164,14 @@ class MiMoMTP(nn.Module):
self
,
input_ids
:
torch
.
Tensor
,
positions
:
torch
.
Tensor
,
previous_
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
intermediate_tensors
:
Optional
[
IntermediateTensors
]
=
None
,
inputs_embeds
:
Optional
[
torch
.
Tensor
]
=
None
,
spec_step_idx
:
int
=
0
,
)
->
torch
.
Tensor
:
assert
spec_step_idx
==
0
,
"mimo_mtp only support predict one token now"
hidden_states
=
self
.
model
(
input_ids
,
positions
,
previous_hidden_states
,
inputs_embeds
,
spec_step_idx
)
hidden_states
=
self
.
model
(
input_ids
,
positions
,
hidden_states
,
inputs_embeds
,
spec_step_idx
)
return
hidden_states
def
compute_logits
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment