Revert "[BugFix] Fix engine hanging after KV cache initialization fai… (#36262)

26bd43b5 · Nick Hill · GitHub · 6b625a88 · 26bd43b5 · 26bd43b5
Unverified Commit 26bd43b5 authored Mar 06, 2026 by Nick Hill Committed by GitHub Mar 06, 2026
Show whitespace changes
Inline Side-by-side

Showing with 25 additions and 59 deletions

vllm/v1/engine/core.py vllm/v1/engine/core.py +25 -54

vllm/v1/engine/utils.py vllm/v1/engine/utils.py +0 -5

No files found.
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-import contextlib
 import os
 import queue
 import signal
@@ -120,17 +119,9 @@ class EngineCore:
            self._eep_scale_up_before_kv_init()
        # Setup KV Caches and update CacheConfig after profiling.
-        try:
+        num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches(
-            num_gpu_blocks, num_cpu_blocks, kv_cache_config = (
+            vllm_config
-                self._initialize_kv_caches(vllm_config)
-            )
-        except Exception:
-            logger.exception(
-                "EngineCore failed during KV cache initialization; "
-                "shutting down executor."
        )
-            self.model_executor.shutdown()
-            raise
        vllm_config.cache_config.num_gpu_blocks = num_gpu_blocks
        vllm_config.cache_config.num_cpu_blocks = num_cpu_blocks
@@ -976,33 +967,13 @@ class EngineCoreProc(EngineCore):
            addresses = self.startup_handshake(
                handshake_socket, local_client, headless, parallel_config_to_update
            )
-            exc_during_init = False
-            try:
            yield addresses
-            except Exception:
-                exc_during_init = True
-                raise
-            finally:
-                if exc_during_init:
-                    # Send FAILED status so the front-end detects init
-                    # failure immediately via ZMQ instead of waiting for
-                    # process sentinel (which may be delayed by cleanup).
-                    with contextlib.suppress(Exception):
-                        handshake_socket.send(
-                            msgspec.msgpack.encode(
-                                {
-                                    "status": "FAILED",
-                                    "local": local_client,
-                                    "headless": headless,
-                                }
-                            )
-                        )
-                else:
            # Send ready message.
            num_gpu_blocks = vllm_config.cache_config.num_gpu_blocks
-                    # We pass back the coordinator stats update address
+            # We pass back the coordinator stats update address here for the
-                    # here for the external LB case for our colocated
+            # external LB case for our colocated front-end to use (coordinator
-                    # front-end to use (coordinator only runs with rank 0).
+            # only runs with rank 0).
            dp_stats_address = self.frontend_stats_publish_address
            # Include config hash for DP configuration validation

--- a/vllm/v1/engine/utils.py
+++ b/vllm/v1/engine/utils.py
@@ -1130,11 +1130,6 @@ def wait_for_engine_startup(
            start_pending[0 if local else 1] -= 1
            engine.state = CoreEngineState.READY
-        elif status == "FAILED":
-            raise RuntimeError(
-                f"Engine core {eng_index} reported initialization failure. "
-                "See root cause above."
-            )
        else:
            raise RuntimeError(
                f"Unexpected {status} message for "