Unverified Commit 95592fa0 authored by Roy's avatar Roy Committed by GitHub
Browse files

align llm_engine and async_engine. (#1081)

parent fbe66e1d
......@@ -183,10 +183,9 @@ class _AsyncLLMEngine(LLMEngine):
and updates the scheduler with the model outputs. Finally, it decodes
the sequences and returns the newly generated results.
"""
(seq_group_metadata_list, scheduler_outputs,
early_return) = self._schedule()
if early_return is not None:
return early_return
seq_group_metadata_list, scheduler_outputs, ignored = self._schedule()
if scheduler_outputs.is_empty():
return ignored
# Execute the model.
output = await self._run_workers_async(
......@@ -197,7 +196,7 @@ class _AsyncLLMEngine(LLMEngine):
blocks_to_copy=scheduler_outputs.blocks_to_copy,
)
return self._process_model_outputs(output, scheduler_outputs)
return self._process_model_outputs(output, scheduler_outputs) + ignored
async def _run_workers_async(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment