Unverified Commit ef076c1b authored by Nick Hill's avatar Nick Hill Committed by GitHub
Browse files

[Core] Change max_model_len in EngineCoreReadyResponse to be non-None (#39442)


Signed-off-by: default avatarNick Hill <nickhill123@gmail.com>
parent ec68d53b
......@@ -72,9 +72,9 @@ class EngineCoreReadyResponse:
values (e.g. max_model_len after KV cache auto-fitting).
"""
max_model_len: int
num_gpu_blocks: int
dp_stats_address: str | None
max_model_len: int | None = None
class EngineCoreRequest(
......
......@@ -671,10 +671,8 @@ class MPClient(EngineCoreClient):
return
vllm_config = self.vllm_config
response = msgspec.msgpack.decode(payload, type=EngineCoreReadyResponse)
if response.max_model_len is not None:
vllm_config.model_config.max_model_len = min(
vllm_config.model_config.max_model_len,
response.max_model_len,
vllm_config.model_config.max_model_len, response.max_model_len
)
# Setup KV cache config with initialization state from
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment