Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
23194d83
Unverified
Commit
23194d83
authored
Sep 30, 2025
by
Lucas Wilkinson
Committed by
GitHub
Sep 30, 2025
Browse files
[BugFix] Fix DP/EP hang (#25906)
Signed-off-by:
Lucas Wilkinson
<
lwilkins@redhat.com
>
parent
61aedb5f
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
3 deletions
+15
-3
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_model_runner.py
+15
-3
No files found.
vllm/v1/worker/gpu_model_runner.py
View file @
23194d83
...
@@ -3075,13 +3075,19 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -3075,13 +3075,19 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
# We currently only microbatch if the number of tokens is
# We currently only microbatch if the number of tokens is
# over a certain threshold.
# over a certain threshold.
if
self
.
parallel_config
.
enable_dbo
and
allow_microbatching
:
if
self
.
parallel_config
.
enable_dbo
and
allow_microbatching
:
ubatch_slices
,
num_tokens_after_padding
=
ubatch_split
(
ubatch_slices
,
ubatch_
num_tokens_after_padding
=
ubatch_split
(
num_scheduled_tokens
,
num_scheduled_tokens
,
total_num_scheduled_tokens
,
total_num_scheduled_tokens
,
total_num_scheduled_tokens
,
total_num_scheduled_tokens
,
uniform_decode
=
uniform_decode
,
uniform_decode
=
uniform_decode
,
vllm_config
=
self
.
vllm_config
,
vllm_config
=
self
.
vllm_config
,
)
)
# Currently when DBO is enabled `ubatch_split` returns
# the num_tokens_after_padding for a single ubatch, but we have 2
# TODO(sage,lucas): this is cruft that should be addressed in the
# padding refactor.
if
ubatch_num_tokens_after_padding
is
not
None
:
num_tokens_after_padding
=
ubatch_num_tokens_after_padding
*
2
# If we failed to microbatch, currently need to resynchronize
# If we failed to microbatch, currently need to resynchronize
# TODO(lucas,sage): we should be able to avoid this second sync by
# TODO(lucas,sage): we should be able to avoid this second sync by
...
@@ -3198,7 +3204,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -3198,7 +3204,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
# filter out the valid batch descriptor
# filter out the valid batch descriptor
_cg_mode
,
batch_descriptor
=
self
.
cudagraph_dispatcher
.
dispatch
(
_cg_mode
,
batch_descriptor
=
self
.
cudagraph_dispatcher
.
dispatch
(
BatchDescriptor
(
num_tokens
=
num_tokens
,
BatchDescriptor
(
num_tokens
=
num_tokens
_after_padding
,
uniform_decode
=
uniform_decode
))
\
uniform_decode
=
uniform_decode
))
\
if
not
is_profile
else
(
CUDAGraphMode
.
NONE
,
None
)
if
not
is_profile
else
(
CUDAGraphMode
.
NONE
,
None
)
if
cudagraph_runtime_mode
is
not
None
:
if
cudagraph_runtime_mode
is
not
None
:
...
@@ -3212,7 +3218,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
...
@@ -3212,7 +3218,13 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
cudagraph_runtime_mode
=
_cg_mode
cudagraph_runtime_mode
=
_cg_mode
if
ubatch_slices
is
not
None
:
if
ubatch_slices
is
not
None
:
num_tokens
=
num_tokens
//
2
# Adjust values to reflect a single ubatch.
# TODO(sage,lucas): this is cruft that should be addressed in
# the padding refactor.
num_tokens_after_padding
=
ubatch_slices
[
0
].
num_tokens
if
num_tokens_across_dp
is
not
None
:
num_tokens_across_dp
[:]
=
num_tokens_after_padding
with
self
.
maybe_randomize_inputs
(
input_ids
),
set_forward_context
(
with
self
.
maybe_randomize_inputs
(
input_ids
),
set_forward_context
(
attn_metadata
,
attn_metadata
,
self
.
vllm_config
,
self
.
vllm_config
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment