# SPDX-License-Identifier: Apache-2.0 import subprocess import pytest import os from ..utils import RemoteOpenAIServer, models_path_prefix MODEL_NAME = os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct") @pytest.fixture(scope="module") def server(): args = [ "--max-model-len", "1024", "--enforce-eager", "--load-format", "dummy" ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: yield remote_server @pytest.mark.benchmark def test_bench_serve(server): command = [ "vllm", "bench", "serve", "--model", MODEL_NAME, "--host", server.host, "--port", str(server.port), "--random-input-len", "32", "--random-output-len", "4", "--num-prompts", "5", ] result = subprocess.run(command, capture_output=True, text=True) print(result.stdout) print(result.stderr) assert result.returncode == 0, f"Benchmark failed: {result.stderr}"