Unverified Commit 891b9d33 authored by Brayden Zhong's avatar Brayden Zhong Committed by GitHub
Browse files

[Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976)


Signed-off-by: default avatarBrayden Zhong <b8zhong@uwaterloo.ca>
parent 43078301
......@@ -146,9 +146,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all(
llm.model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
for request in requests), (
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.")
......@@ -599,7 +600,7 @@ if __name__ == "__main__":
"--lora-path",
type=str,
default=None,
help="Path to the lora adapters to use. This can be an absolute path, "
help="Path to the LoRA adapters to use. This can be an absolute path, "
"a relative path, or a Hugging Face model identifier.")
parser.add_argument(
"--prefix-len",
......
......@@ -148,9 +148,10 @@ async def run_vllm_async(
async with build_async_engine_client_from_engine_args(
engine_args, disable_frontend_multiprocessing) as llm:
model_config = await llm.get_model_config()
assert all(
llm.model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
model_config.max_model_len >= (request.prompt_len +
request.expected_output_len)
for request in requests), (
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment