Unverified Commit 891b9d33 authored by Brayden Zhong's avatar Brayden Zhong Committed by GitHub
Browse files

[Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976)


Signed-off-by: default avatarBrayden Zhong <b8zhong@uwaterloo.ca>
parent 43078301
...@@ -146,9 +146,10 @@ async def run_vllm_async( ...@@ -146,9 +146,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args( async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm: engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all( assert all(
llm.model_config.max_model_len >= (request.prompt_len + model_config.max_model_len >= (request.prompt_len +
request.expected_output_len) request.expected_output_len)
for request in requests), ( for request in requests), (
"Please ensure that max_model_len is greater than the sum of" "Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.") " prompt_len and expected_output_len for all requests.")
...@@ -599,7 +600,7 @@ if __name__ == "__main__": ...@@ -599,7 +600,7 @@ if __name__ == "__main__":
"--lora-path", "--lora-path",
type=str, type=str,
default=None, default=None,
help="Path to the lora adapters to use. This can be an absolute path, " help="Path to the LoRA adapters to use. This can be an absolute path, "
"a relative path, or a Hugging Face model identifier.") "a relative path, or a Hugging Face model identifier.")
parser.add_argument( parser.add_argument(
"--prefix-len", "--prefix-len",
......
...@@ -148,9 +148,10 @@ async def run_vllm_async( ...@@ -148,9 +148,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args( async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm: engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all( assert all(
llm.model_config.max_model_len >= (request.prompt_len + model_config.max_model_len >= (request.prompt_len +
request.expected_output_len) request.expected_output_len)
for request in requests), ( for request in requests), (
"Please ensure that max_model_len is greater than the sum of" "Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.") " prompt_len and expected_output_len for all requests.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment