Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4290b704
Unverified
Commit
4290b704
authored
Mar 10, 2025
by
Cody Yu
Committed by
GitHub
Mar 10, 2025
Browse files
[V1][PP] Do not block engine core when no requests to schedule (#14585)
Signed-off-by:
Cody Yu
<
hao.yu.cody@gmail.com
>
parent
c91b64f7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
16 deletions
+11
-16
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+11
-16
No files found.
vllm/v1/engine/core.py
View file @
4290b704
...
@@ -205,23 +205,18 @@ class EngineCore:
...
@@ -205,23 +205,18 @@ class EngineCore:
self
.
batch_queue
.
put_nowait
(
self
.
batch_queue
.
put_nowait
(
(
future
,
scheduler_output
))
# type: ignore
(
future
,
scheduler_output
))
# type: ignore
# If all requests are scheduled or the job queue is full,
scheduled_batch
=
(
scheduler_output
is
not
None
and
scheduler_output
.
total_num_scheduled_tokens
>
0
)
# If no more requests can be scheduled and the job queue is not empty,
# block until the first batch in the job queue is finished.
# block until the first batch in the job queue is finished.
if
(
scheduler_output
is
None
if
not
scheduled_batch
and
not
self
.
batch_queue
.
empty
():
or
scheduler_output
.
total_num_scheduled_tokens
==
0
):
future
,
scheduler_output
=
self
.
batch_queue
.
get_nowait
()
try
:
# Blocking until the first result is available.
future
,
scheduler_output
=
self
.
batch_queue
.
get
(
model_output
=
future
.
result
()
timeout
=
POLLING_TIMEOUT_S
)
self
.
batch_queue
.
task_done
()
# Blocking until the first result is available.
engine_core_outputs
=
self
.
scheduler
.
update_from_output
(
model_output
=
future
.
result
()
scheduler_output
,
model_output
)
self
.
batch_queue
.
task_done
()
engine_core_outputs
=
self
.
scheduler
.
update_from_output
(
scheduler_output
,
model_output
)
except
queue
.
Empty
:
# If the queue is empty (timeout at .get), return
# an empty EngineCoreOutputs for logging.
engine_core_outputs
=
EngineCoreOutputs
(
outputs
=
[],
scheduler_stats
=
self
.
scheduler
.
make_stats
())
return
engine_core_outputs
return
engine_core_outputs
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment