Unverified Commit 4290b704 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[V1][PP] Do not block engine core when no requests to schedule (#14585)


Signed-off-by: default avatarCody Yu <hao.yu.cody@gmail.com>
parent c91b64f7
...@@ -205,23 +205,18 @@ class EngineCore: ...@@ -205,23 +205,18 @@ class EngineCore:
self.batch_queue.put_nowait( self.batch_queue.put_nowait(
(future, scheduler_output)) # type: ignore (future, scheduler_output)) # type: ignore
# If all requests are scheduled or the job queue is full, scheduled_batch = (scheduler_output is not None
and scheduler_output.total_num_scheduled_tokens > 0)
# If no more requests can be scheduled and the job queue is not empty,
# block until the first batch in the job queue is finished. # block until the first batch in the job queue is finished.
if (scheduler_output is None if not scheduled_batch and not self.batch_queue.empty():
or scheduler_output.total_num_scheduled_tokens == 0): future, scheduler_output = self.batch_queue.get_nowait()
try:
future, scheduler_output = self.batch_queue.get(
timeout=POLLING_TIMEOUT_S)
# Blocking until the first result is available. # Blocking until the first result is available.
model_output = future.result() model_output = future.result()
self.batch_queue.task_done() self.batch_queue.task_done()
engine_core_outputs = self.scheduler.update_from_output( engine_core_outputs = self.scheduler.update_from_output(
scheduler_output, model_output) scheduler_output, model_output)
except queue.Empty:
# If the queue is empty (timeout at .get), return
# an empty EngineCoreOutputs for logging.
engine_core_outputs = EngineCoreOutputs(
outputs=[], scheduler_stats=self.scheduler.make_stats())
return engine_core_outputs return engine_core_outputs
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment