[Core] Change `execute_model_with_error_logging()` to be a ctx manager (#27060)

Signed-off-by: Nick Hill <nhill@redhat.com>

[Core] Change `execute_model_with_error_logging()` to be a ctx manager (#27060)
Signed-off-by: Nick Hill <nhill@redhat.com>
fe3b9372 · Nick Hill · GitHub · bde9e227 · fe3b9372
Unverified Commit fe3b9372 authored Oct 16, 2025 by Nick Hill Committed by GitHub Oct 17, 2025
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 15 deletions

vllm/v1/engine/core.py vllm/v1/engine/core.py +11 -15

No files found.
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -290,14 +290,11 @@ class EngineCore:
        # (i.e. client-aborted vs stop criteria met).
        self.scheduler.finish_requests(request_ids, RequestStatus.FINISHED_ABORTED)
-    def execute_model_with_error_logging(
+    @contextmanager
-        self,
+    def log_error_detail(self, scheduler_output: SchedulerOutput):
-        model_fn: Callable[[SchedulerOutput], ModelRunnerOutput],
-        scheduler_output: SchedulerOutput,
-    ) -> ModelRunnerOutput:
        """Execute the model and log detailed info on failure."""
        try:
-            return model_fn(scheduler_output)
+            yield
        except Exception as err:
            # We do not want to catch BaseException here since we're only
            # interested in dumping info when the exception is due to an
@@ -321,15 +318,16 @@ class EngineCore:
        if not self.scheduler.has_requests():
            return {}, False
        scheduler_output = self.scheduler.schedule()
-        model_output = self.execute_model_with_error_logging(
-            self.model_executor.execute_model,  # type: ignore
+        with self.log_error_detail(scheduler_output):
-            scheduler_output,
+            model_output = self.model_executor.execute_model(scheduler_output)
-        )
+        assert isinstance(model_output, ModelRunnerOutput)
        engine_core_outputs = self.scheduler.update_from_output(
            scheduler_output, model_output
        )
-        return (engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0)
+        return engine_core_outputs, scheduler_output.total_num_scheduled_tokens > 0
    def post_step(self, model_executed: bool) -> None:
        if self.use_spec_decode and model_executed:
@@ -386,14 +384,12 @@ class EngineCore:
        # Block until the next result is available.
        future, scheduler_output = batch_queue.pop()
-        model_output = self.execute_model_with_error_logging(
+        with self.log_error_detail(scheduler_output):
-            lambda _: future.result(), scheduler_output
+            model_output = future.result()
-        )
        engine_core_outputs = self.scheduler.update_from_output(
            scheduler_output, model_output
        )
        return engine_core_outputs, model_executed
    def shutdown(self):