Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
26bd43b5
Unverified
Commit
26bd43b5
authored
Mar 06, 2026
by
Nick Hill
Committed by
GitHub
Mar 06, 2026
Browse files
Revert "[BugFix] Fix engine hanging after KV cache initialization fai… (#36262)
parent
6b625a88
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
25 additions
and
59 deletions
+25
-59
vllm/v1/engine/core.py
vllm/v1/engine/core.py
+25
-54
vllm/v1/engine/utils.py
vllm/v1/engine/utils.py
+0
-5
No files found.
vllm/v1/engine/core.py
View file @
26bd43b5
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
contextlib
import
os
import
queue
import
signal
...
...
@@ -120,17 +119,9 @@ class EngineCore:
self
.
_eep_scale_up_before_kv_init
()
# Setup KV Caches and update CacheConfig after profiling.
try
:
num_gpu_blocks
,
num_cpu_blocks
,
kv_cache_config
=
(
self
.
_initialize_kv_caches
(
vllm_config
)
)
except
Exception
:
logger
.
exception
(
"EngineCore failed during KV cache initialization; "
"shutting down executor."
num_gpu_blocks
,
num_cpu_blocks
,
kv_cache_config
=
self
.
_initialize_kv_caches
(
vllm_config
)
self
.
model_executor
.
shutdown
()
raise
vllm_config
.
cache_config
.
num_gpu_blocks
=
num_gpu_blocks
vllm_config
.
cache_config
.
num_cpu_blocks
=
num_cpu_blocks
...
...
@@ -976,33 +967,13 @@ class EngineCoreProc(EngineCore):
addresses
=
self
.
startup_handshake
(
handshake_socket
,
local_client
,
headless
,
parallel_config_to_update
)
exc_during_init
=
False
try
:
yield
addresses
except
Exception
:
exc_during_init
=
True
raise
finally
:
if
exc_during_init
:
# Send FAILED status so the front-end detects init
# failure immediately via ZMQ instead of waiting for
# process sentinel (which may be delayed by cleanup).
with
contextlib
.
suppress
(
Exception
):
handshake_socket
.
send
(
msgspec
.
msgpack
.
encode
(
{
"status"
:
"FAILED"
,
"local"
:
local_client
,
"headless"
:
headless
,
}
)
)
else
:
# Send ready message.
num_gpu_blocks
=
vllm_config
.
cache_config
.
num_gpu_blocks
# We pass back the coordinator stats update address
# here for the
external LB case for our colocated
# front-end to use (coordinator
only runs with rank 0).
# We pass back the coordinator stats update address
here for the
#
external LB case for our colocated
front-end to use (coordinator
#
only runs with rank 0).
dp_stats_address
=
self
.
frontend_stats_publish_address
# Include config hash for DP configuration validation
...
...
vllm/v1/engine/utils.py
View file @
26bd43b5
...
...
@@ -1130,11 +1130,6 @@ def wait_for_engine_startup(
start_pending
[
0
if
local
else
1
]
-=
1
engine
.
state
=
CoreEngineState
.
READY
elif
status
==
"FAILED"
:
raise
RuntimeError
(
f
"Engine core
{
eng_index
}
reported initialization failure. "
"See root cause above."
)
else
:
raise
RuntimeError
(
f
"Unexpected
{
status
}
message for "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment