[Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976)

Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>

[Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976)
Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
891b9d33 · Brayden Zhong · GitHub · 43078301 · 891b9d33 · 891b9d33
Unverified Commit 891b9d33 authored May 12, 2025 by Brayden Zhong Committed by GitHub May 11, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 5 deletions

benchmarks/benchmark_throughput.py benchmarks/benchmark_throughput.py +4 -3

vllm/benchmarks/throughput.py vllm/benchmarks/throughput.py +3 -2

No files found.
--- a/benchmarks/benchmark_throughput.py
+++ b/benchmarks/benchmark_throughput.py
@@ -146,9 +146,10 @@ async def run_vllm_async(

    async with build_async_engine_client_from_engine_args(
            engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
        assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
            for request in requests), (
                "Please ensure that max_model_len is greater than the sum of"
                " prompt_len and expected_output_len for all requests.")
@@ -599,7 +600,7 @@ if __name__ == "__main__":
        "--lora-path",
        type=str,
        default=None,
-        help="Path to the lora adapters to use. This can be an absolute path, "
+        help="Path to the LoRA adapters to use. This can be an absolute path, "
        "a relative path, or a Hugging Face model identifier.")
    parser.add_argument(
        "--prefix-len",

--- a/vllm/benchmarks/throughput.py
+++ b/vllm/benchmarks/throughput.py
@@ -148,9 +148,10 @@ async def run_vllm_async(

    async with build_async_engine_client_from_engine_args(
            engine_args, disable_frontend_multiprocessing) as llm:
+        model_config = await llm.get_model_config()
        assert all(
-            llm.model_config.max_model_len >= (request.prompt_len +
-                                               request.expected_output_len)
+            model_config.max_model_len >= (request.prompt_len +
+                                           request.expected_output_len)
            for request in requests), (
                "Please ensure that max_model_len is greater than the sum of"
                " prompt_len and expected_output_len for all requests.")