Unverified Commit fa0d353a authored by fangyuchu's avatar fangyuchu Committed by GitHub
Browse files

[Bugfix] Surface exceptions from non-blocking execute_model in UniProcExecutor...


[Bugfix] Surface exceptions from non-blocking execute_model in UniProcExecutor to avoid DP deadlocks (#35194)
Signed-off-by: default avatarfangyuchu <fangyuchu@qq.com>
parent b386bb3d
...@@ -443,9 +443,10 @@ class EngineCore: ...@@ -443,9 +443,10 @@ class EngineCore:
deferred_scheduler_output = None deferred_scheduler_output = None
if self.scheduler.has_requests(): if self.scheduler.has_requests():
scheduler_output = self.scheduler.schedule() scheduler_output = self.scheduler.schedule()
exec_future = self.model_executor.execute_model( with self.log_error_detail(scheduler_output):
scheduler_output, non_block=True exec_future = self.model_executor.execute_model(
) scheduler_output, non_block=True
)
if self.is_ec_consumer: if self.is_ec_consumer:
model_executed = scheduler_output.total_num_scheduled_tokens > 0 model_executed = scheduler_output.total_num_scheduled_tokens > 0
......
...@@ -100,12 +100,17 @@ class UniProcExecutor(Executor): ...@@ -100,12 +100,17 @@ class UniProcExecutor(Executor):
def execute_model( # type: ignore[override] def execute_model( # type: ignore[override]
self, scheduler_output: SchedulerOutput, non_block: bool = False self, scheduler_output: SchedulerOutput, non_block: bool = False
) -> ModelRunnerOutput | None | Future[ModelRunnerOutput | None]: ) -> ModelRunnerOutput | None | Future[ModelRunnerOutput | None]:
return self.collective_rpc( output = self.collective_rpc(
"execute_model", "execute_model",
args=(scheduler_output,), args=(scheduler_output,),
non_block=non_block, non_block=non_block,
single_value=True, single_value=True,
) )
# In non-blocking mode, surface any exception as early as possible.
if non_block and output.done():
# Raise the exception in-line if the task failed.
output.result()
return output
def sample_tokens( # type: ignore[override] def sample_tokens( # type: ignore[override]
self, grammar_output: GrammarOutput | None, non_block: bool = False self, grammar_output: GrammarOutput | None, non_block: bool = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment