[Core] Change max_model_len in EngineCoreReadyResponse to be non-None (#39442)

Signed-off-by: Nick Hill <nickhill123@gmail.com>

[Core] Change max_model_len in EngineCoreReadyResponse to be non-None (#39442)
Signed-off-by: Nick Hill <nickhill123@gmail.com>
ef076c1b · Nick Hill · GitHub · ec68d53b · ef076c1b · ef076c1b
Unverified Commit ef076c1b authored Apr 09, 2026 by Nick Hill Committed by GitHub Apr 10, 2026
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 6 deletions

vllm/v1/engine/__init__.py vllm/v1/engine/__init__.py +1 -1

vllm/v1/engine/core_client.py vllm/v1/engine/core_client.py +3 -5

No files found.
--- a/vllm/v1/engine/__init__.py
+++ b/vllm/v1/engine/__init__.py
@@ -72,9 +72,9 @@ class EngineCoreReadyResponse:
    values (e.g. max_model_len after KV cache auto-fitting).
    """

+    max_model_len: int
    num_gpu_blocks: int
    dp_stats_address: str | None
-    max_model_len: int | None = None


 class EngineCoreRequest(

--- a/vllm/v1/engine/core_client.py
+++ b/vllm/v1/engine/core_client.py
@@ -671,10 +671,8 @@ class MPClient(EngineCoreClient):
            return
        vllm_config = self.vllm_config
        response = msgspec.msgpack.decode(payload, type=EngineCoreReadyResponse)
-        if response.max_model_len is not None:
        vllm_config.model_config.max_model_len = min(
-                vllm_config.model_config.max_model_len,
-                response.max_model_len,
+            vllm_config.model_config.max_model_len, response.max_model_len
        )

        # Setup KV cache config with initialization state from