Unverified Commit abd92242 authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

resolve pydantic error in startup benchmark (#31348)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 4dc0d606
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import subprocess
import pytest
@pytest.mark.benchmark
def test_bench_startup():
command = [
"vllm",
"bench",
"startup",
]
result = subprocess.run(command, capture_output=True, text=True)
print(result.stdout)
print(result.stderr)
assert result.returncode == 0, f"Benchmark failed: {result.stderr}"
...@@ -55,7 +55,7 @@ def cold_startup(): ...@@ -55,7 +55,7 @@ def cold_startup():
os.environ.pop("VLLM_CACHE_ROOT", None) os.environ.pop("VLLM_CACHE_ROOT", None)
def run_startup_in_subprocess(engine_args_dict, result_queue): def run_startup_in_subprocess(engine_args, result_queue):
""" """
Run LLM startup in a subprocess and return timing metrics via a queue. Run LLM startup in a subprocess and return timing metrics via a queue.
This ensures complete isolation between iterations. This ensures complete isolation between iterations.
...@@ -63,9 +63,6 @@ def run_startup_in_subprocess(engine_args_dict, result_queue): ...@@ -63,9 +63,6 @@ def run_startup_in_subprocess(engine_args_dict, result_queue):
try: try:
# Import inside the subprocess to avoid issues with forking # Import inside the subprocess to avoid issues with forking
from vllm import LLM from vllm import LLM
from vllm.engine.arg_utils import EngineArgs
engine_args = EngineArgs(**engine_args_dict)
# Measure total startup time # Measure total startup time
start_time = time.perf_counter() start_time = time.perf_counter()
...@@ -200,15 +197,13 @@ def main(args: argparse.Namespace): ...@@ -200,15 +197,13 @@ def main(args: argparse.Namespace):
Create LLM instance in a subprocess and measure startup time. Create LLM instance in a subprocess and measure startup time.
Returns timing metrics, using subprocess for complete isolation. Returns timing metrics, using subprocess for complete isolation.
""" """
# Convert engine_args to dictionary for pickling
engine_args_dict = dataclasses.asdict(engine_args)
# Create a queue for inter-process communication # Create a queue for inter-process communication
result_queue = multiprocessing.Queue() result_queue = multiprocessing.Queue()
process = multiprocessing.Process( process = multiprocessing.Process(
target=run_startup_in_subprocess, target=run_startup_in_subprocess,
args=( args=(
engine_args_dict, engine_args,
result_queue, result_queue,
), ),
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment