Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
62095e82
Unverified
Commit
62095e82
authored
Apr 06, 2026
by
Nick Hill
Committed by
GitHub
Apr 07, 2026
Browse files
[BugFix][MRV2] Fix cuda event reuse race (#39115)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
b2b2c523
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
2 additions
and
7 deletions
+2
-7
vllm/v1/worker/gpu/async_utils.py
vllm/v1/worker/gpu/async_utils.py
+2
-4
vllm/v1/worker/gpu/model_runner.py
vllm/v1/worker/gpu/model_runner.py
+0
-3
No files found.
vllm/v1/worker/gpu/async_utils.py
View file @
62095e82
...
...
@@ -17,7 +17,6 @@ class AsyncOutput(AsyncModelRunnerOutput):
num_sampled_tokens
:
torch
.
Tensor
,
main_stream
:
torch
.
cuda
.
Stream
,
copy_stream
:
torch
.
cuda
.
Stream
,
copy_event
:
torch
.
cuda
.
Event
,
):
# NOTE(woosuk): We must retain references to the GPU tensors,
# as the copy operations are performed on a different CUDA stream than
...
...
@@ -25,7 +24,7 @@ class AsyncOutput(AsyncModelRunnerOutput):
self
.
model_runner_output
=
model_runner_output
self
.
sampler_output
=
sampler_output
self
.
num_sampled_tokens
=
num_sampled_tokens
self
.
copy_event
=
copy_e
vent
self
.
copy_event
=
torch
.
cuda
.
E
vent
()
with
stream
(
copy_stream
,
main_stream
):
copy_stream
.
wait_stream
(
main_stream
)
...
...
@@ -78,12 +77,11 @@ class AsyncPoolingOutput(AsyncModelRunnerOutput):
is_valid
:
torch
.
Tensor
|
None
,
main_stream
:
torch
.
cuda
.
Stream
,
copy_stream
:
torch
.
cuda
.
Stream
,
copy_event
:
torch
.
cuda
.
Event
,
):
self
.
model_runner_output
=
model_runner_output
self
.
pooler_output
=
pooler_output
self
.
is_valid
=
is_valid
self
.
copy_event
=
copy_e
vent
self
.
copy_event
=
torch
.
cuda
.
E
vent
()
with
stream
(
copy_stream
,
main_stream
):
copy_stream
.
wait_stream
(
main_stream
)
...
...
vllm/v1/worker/gpu/model_runner.py
View file @
62095e82
...
...
@@ -130,7 +130,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
self
.
use_async_scheduling
=
self
.
scheduler_config
.
async_scheduling
self
.
output_copy_stream
=
torch
.
cuda
.
Stream
(
self
.
device
)
self
.
output_copy_event
=
torch
.
cuda
.
Event
()
# Pipeline parallelism.
self
.
use_pp
=
self
.
parallel_config
.
pipeline_parallel_size
>
1
...
...
@@ -1180,7 +1179,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
num_sampled_tokens
=
num_sampled
,
main_stream
=
self
.
main_stream
,
copy_stream
=
self
.
output_copy_stream
,
copy_event
=
self
.
output_copy_event
,
)
mm_inputs
:
tuple
[
list
[
torch
.
Tensor
],
torch
.
Tensor
]
|
None
=
None
...
...
@@ -1270,7 +1268,6 @@ class GPUModelRunner(LoRAModelRunnerMixin):
is_valid
=
is_valid
,
main_stream
=
self
.
main_stream
,
copy_stream
=
self
.
output_copy_stream
,
copy_event
=
self
.
output_copy_event
,
)
self
.
postprocess_pool
(
input_batch
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment