Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55ee9d72
"vllm/vscode:/vscode.git/clone" did not exist on "6682c231fa97f33d3b3f4d788da4e14959989a67"
Commit
55ee9d72
authored
Dec 02, 2025
by
王敏
Browse files
[fix]解决dp模式mtp卡住问题
parent
73cbc9fe
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
2 deletions
+20
-2
vllm/forward_context.py
vllm/forward_context.py
+3
-1
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/layer.py
+3
-1
vllm/v1/spec_decode/eagle.py
vllm/v1/spec_decode/eagle.py
+14
-0
No files found.
vllm/forward_context.py
View file @
55ee9d72
...
@@ -135,7 +135,9 @@ def set_forward_context(
...
@@ -135,7 +135,9 @@ def set_forward_context(
if
need_to_track_batchsize
:
if
need_to_track_batchsize
:
forward_start_time
=
time
.
perf_counter
()
forward_start_time
=
time
.
perf_counter
()
dp_metadata
:
Optional
[
DPMetadata
]
=
None
dp_metadata
:
Optional
[
DPMetadata
]
=
None
if
vllm_config
.
parallel_config
.
data_parallel_size
>
1
and
(
dp_size
=
vllm_config
.
parallel_config
.
data_parallel_size
use_navie_ep
=
envs
.
VLLM_ALL2ALL_BACKEND
==
'naive'
and
dp_size
>
1
and
vllm_config
.
parallel_config
.
enable_expert_parallel
if
use_navie_ep
and
dp_size
>
1
and
(
attn_metadata
is
not
None
or
num_tokens
is
not
None
):
attn_metadata
is
not
None
or
num_tokens
is
not
None
):
dp_metadata
=
DPMetadata
.
make
(
vllm_config
.
parallel_config
,
dp_metadata
=
DPMetadata
.
make
(
vllm_config
.
parallel_config
,
attn_metadata
,
num_tokens
or
0
,
attn_metadata
,
num_tokens
or
0
,
...
...
vllm/model_executor/layers/fused_moe/layer.py
View file @
55ee9d72
...
@@ -1537,7 +1537,9 @@ class FusedMoE(torch.nn.Module):
...
@@ -1537,7 +1537,9 @@ class FusedMoE(torch.nn.Module):
do_naive_dispatch_combine
:
bool
=
(
do_naive_dispatch_combine
:
bool
=
(
self
.
dp_size
>
1
self
.
dp_size
>
1
and
not
self
.
moe_parallel_config
.
use_deepep_ht_kernels
)
and
self
.
ep_size
>
1
and
envs
.
VLLM_ALL2ALL_BACKEND
==
'naive'
)
#and not self.moe_parallel_config.use_deepep_ht_kernels)
if
do_naive_dispatch_combine
:
if
do_naive_dispatch_combine
:
hidden_states
,
router_logits
=
get_ep_group
().
dispatch
(
hidden_states
,
router_logits
=
get_ep_group
().
dispatch
(
hidden_states
,
router_logits
)
hidden_states
,
router_logits
)
...
...
vllm/v1/spec_decode/eagle.py
View file @
55ee9d72
...
@@ -89,6 +89,9 @@ class EagleProposer:
...
@@ -89,6 +89,9 @@ class EagleProposer:
device
=
device
,
device
=
device
,
dtype
=
torch
.
int32
)
dtype
=
torch
.
int32
)
self
.
dp_size
=
vllm_config
.
parallel_config
.
data_parallel_size
self
.
enable_expert_parallel
=
vllm_config
.
parallel_config
.
enable_expert_parallel
def
propose
(
def
propose
(
self
,
self
,
# [num_tokens]
# [num_tokens]
...
@@ -529,6 +532,17 @@ class EagleProposer:
...
@@ -529,6 +532,17 @@ class EagleProposer:
self
.
hidden_states
[:
num_tokens
],
self
.
hidden_states
[:
num_tokens
],
)
)
if
self
.
dp_size
>
1
and
self
.
enable_expert_parallel
and
self
.
num_speculative_tokens
>
1
:
for
_
in
range
(
self
.
num_speculative_tokens
-
1
):
with
set_forward_context
(
attn_metadata
,
self
.
vllm_config
,
num_tokens
=
num_tokens
):
self
.
model
(
self
.
input_ids
[:
num_tokens
],
self
.
positions
[:
num_tokens
],
self
.
hidden_states
[:
num_tokens
],
)
def
validate_same_kv_cache_group
(
self
,
def
validate_same_kv_cache_group
(
self
,
kv_cache_config
:
KVCacheConfig
)
->
None
:
kv_cache_config
:
KVCacheConfig
)
->
None
:
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment