resolve pydantic error in startup benchmark (#31348)

Signed-off-by: Andy Xie <andy.xning@gmail.com>

resolve pydantic error in startup benchmark (#31348)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
abd92242 · Ning Xie · GitHub · 4dc0d606 · abd92242 · abd92242
Unverified Commit abd92242 authored Jan 10, 2026 by Ning Xie Committed by GitHub Jan 10, 2026
Show whitespace changes
Inline Side-by-side

Showing with 21 additions and 7 deletions

tests/benchmarks/test_bench_startup.py tests/benchmarks/test_bench_startup.py +19 -0

vllm/benchmarks/startup.py vllm/benchmarks/startup.py +2 -7

No files found.
--- a/tests/benchmarks/test_bench_startup.py
+++ b/tests/benchmarks/test_bench_startup.py
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import subprocess
+
+import pytest
+
+
+@pytest.mark.benchmark
+def test_bench_startup():
+    command = [
+        "vllm",
+        "bench",
+        "startup",
+    ]
+    result = subprocess.run(command, capture_output=True, text=True)
+    print(result.stdout)
+    print(result.stderr)
+
+    assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
--- a/vllm/benchmarks/startup.py
+++ b/vllm/benchmarks/startup.py
@@ -55,7 +55,7 @@ def cold_startup():
            os.environ.pop("VLLM_CACHE_ROOT", None)


-def run_startup_in_subprocess(engine_args_dict, result_queue):
+def run_startup_in_subprocess(engine_args, result_queue):
    """
    Run LLM startup in a subprocess and return timing metrics via a queue.
    This ensures complete isolation between iterations.
@@ -63,9 +63,6 @@ def run_startup_in_subprocess(engine_args_dict, result_queue):
    try:
        # Import inside the subprocess to avoid issues with forking
        from vllm import LLM
-        from vllm.engine.arg_utils import EngineArgs
-
-        engine_args = EngineArgs(**engine_args_dict)

        # Measure total startup time
        start_time = time.perf_counter()
@@ -200,15 +197,13 @@ def main(args: argparse.Namespace):
        Create LLM instance in a subprocess and measure startup time.
        Returns timing metrics, using subprocess for complete isolation.
        """
-        # Convert engine_args to dictionary for pickling
-        engine_args_dict = dataclasses.asdict(engine_args)

        # Create a queue for inter-process communication
        result_queue = multiprocessing.Queue()
        process = multiprocessing.Process(
            target=run_startup_in_subprocess,
            args=(
-                engine_args_dict,
+                engine_args,
                result_queue,
            ),
        )