Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dc2f159f
Unverified
Commit
dc2f159f
authored
Jul 23, 2025
by
Woosuk Kwon
Committed by
GitHub
Jul 23, 2025
Browse files
Dump input metadata on crash for async scheduling (#21258)
Signed-off-by:
Woosuk Kwon
<
woosuk.kwon@berkeley.edu
>
parent
d5b981f8
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
4 deletions
+14
-4
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+14
-4
No files found.
vllm/v1/engine/core.py
View file @
dc2f159f
...
...
@@ -234,9 +234,14 @@ class EngineCore:
self
.
scheduler
.
finish_requests
(
request_ids
,
RequestStatus
.
FINISHED_ABORTED
)
def
execute_model
(
self
,
scheduler_output
:
SchedulerOutput
):
def
execute_model_with_error_logging
(
self
,
model_fn
:
Callable
[[
SchedulerOutput
],
ModelRunnerOutput
],
scheduler_output
:
SchedulerOutput
,
)
->
ModelRunnerOutput
:
"""Execute the model and log detailed info on failure."""
try
:
return
self
.
model_
executor
.
execute_model
(
scheduler_output
)
return
model_
fn
(
scheduler_output
)
except
Exception
as
err
:
# We do not want to catch BaseException here since we're only
# interested in dumping info when the exception is due to an
...
...
@@ -259,7 +264,9 @@ class EngineCore:
if
not
self
.
scheduler
.
has_requests
():
return
{},
False
scheduler_output
=
self
.
scheduler
.
schedule
()
model_output
=
self
.
execute_model
(
scheduler_output
)
model_output
=
self
.
execute_model_with_error_logging
(
self
.
model_executor
.
execute_model
,
# type: ignore
scheduler_output
)
engine_core_outputs
=
self
.
scheduler
.
update_from_output
(
scheduler_output
,
model_output
)
# type: ignore
...
...
@@ -306,8 +313,11 @@ class EngineCore:
# so we need more work.
if
not
scheduled_batch
and
not
self
.
batch_queue
.
empty
():
future
,
scheduler_output
=
self
.
batch_queue
.
get_nowait
()
# Blocking until the first result is available.
model_output
=
future
.
result
()
model_output
=
self
.
execute_model_with_error_logging
(
lambda
_
:
future
.
result
(),
scheduler_output
)
self
.
batch_queue
.
task_done
()
engine_core_outputs
=
(
self
.
scheduler
.
update_from_output
(
scheduler_output
,
model_output
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment