Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d811b442
Unverified
Commit
d811b442
authored
Nov 01, 2025
by
Haco
Committed by
GitHub
Nov 01, 2025
Browse files
[Bugfix] DeepSeek V3.2 MTP metadata & CUDA graph issues (#26779)
Signed-off-by:
xiaohajiayou
<
923390377@qq.com
>
parent
30a14b03
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
5 deletions
+8
-5
vllm/v1/spec_decode/eagle.py
vllm/v1/spec_decode/eagle.py
+8
-5
No files found.
vllm/v1/spec_decode/eagle.py
View file @
d811b442
...
@@ -109,6 +109,7 @@ class EagleProposer:
...
@@ -109,6 +109,7 @@ class EagleProposer:
else
[]
else
[]
)
)
self
.
use_cuda_graph
=
self
.
use_cuda_graph
and
bool
(
self
.
cudagraph_batch_sizes
)
# persistent buffers for cuda graph
# persistent buffers for cuda graph
self
.
input_ids
=
torch
.
zeros
(
self
.
input_ids
=
torch
.
zeros
(
self
.
max_num_tokens
,
dtype
=
torch
.
int32
,
device
=
device
self
.
max_num_tokens
,
dtype
=
torch
.
int32
,
device
=
device
...
@@ -939,7 +940,7 @@ class EagleProposer:
...
@@ -939,7 +940,7 @@ class EagleProposer:
self
.
vllm_config
,
DeepseekV32IndexerCache
self
.
vllm_config
,
DeepseekV32IndexerCache
)
)
draft_indexer_layer_names
=
indexer_layers
.
keys
()
-
target_indexer_layer_names
draft_indexer_layer_names
=
indexer_layers
.
keys
()
-
target_indexer_layer_names
self
.
attn_layer_names
=
list
(
draft_attn_layer_names
)
self
.
attn_layer_names
=
list
(
draft_attn_layer_names
-
draft_indexer_layer_names
)
self
.
indexer_layer_names
=
list
(
draft_indexer_layer_names
)
self
.
indexer_layer_names
=
list
(
draft_indexer_layer_names
)
if
self
.
indexer_layer_names
:
if
self
.
indexer_layer_names
:
...
@@ -1050,16 +1051,18 @@ class EagleProposer:
...
@@ -1050,16 +1051,18 @@ class EagleProposer:
num_tokens
:
int
,
num_tokens
:
int
,
use_cudagraphs
=
True
,
use_cudagraphs
=
True
,
)
->
None
:
)
->
None
:
if
use_cudagraphs
and
num_tokens
<=
self
.
cudagraph_batch_sizes
[
-
1
]:
# Determine if CUDA graphs should be used for this run.
cudagraphs_enabled
=
use_cudagraphs
and
self
.
use_cuda_graph
if
cudagraphs_enabled
and
num_tokens
<=
self
.
cudagraph_batch_sizes
[
-
1
]:
num_tokens
=
self
.
vllm_config
.
pad_for_cudagraph
(
num_tokens
)
num_tokens
=
self
.
vllm_config
.
pad_for_cudagraph
(
num_tokens
)
with
set_forward_context
(
with
set_forward_context
(
None
,
None
,
self
.
vllm_config
,
self
.
vllm_config
,
num_tokens
=
num_tokens
,
num_tokens
=
num_tokens
,
cudagraph_runtime_mode
=
CUDAGraphMode
.
PIECEWISE
cudagraph_runtime_mode
=
(
if
use_cudagraphs
CUDAGraphMode
.
PIECEWISE
if
cudagraphs_enabled
else
CUDAGraphMode
.
NONE
else
CUDAGraphMode
.
NONE
,
)
,
):
):
if
self
.
supports_mm_inputs
:
if
self
.
supports_mm_inputs
:
input_ids
=
None
input_ids
=
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment