Unverified Commit d00d6529 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm...


[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm serve` command (#25967)
Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 3b279a84
......@@ -786,13 +786,43 @@ def test_model_specification(parser_with_config, cli_config_file,
parser_with_config.parse_args(['serve', '--config', cli_config_file])
# Test using --model option raises error
with pytest.raises(
ValueError,
match=
("With `vllm serve`, you should provide the model as a positional "
"argument or in a config file instead of via the `--model` option."),
):
parser_with_config.parse_args(['serve', '--model', 'my-model'])
# with pytest.raises(
# ValueError,
# match=
# ("With `vllm serve`, you should provide the model as a positional "
# "argument or in a config file instead of via the `--model` option."),
# ):
# parser_with_config.parse_args(['serve', '--model', 'my-model'])
# Test using --model option back-compatibility
# (when back-compatibility ends, the above test should be uncommented
# and the below test should be removed)
args = parser_with_config.parse_args([
'serve',
'--tensor-parallel-size',
'2',
'--model',
'my-model',
'--trust-remote-code',
'--port',
'8001',
])
assert args.model is None
assert args.tensor_parallel_size == 2
assert args.trust_remote_code is True
assert args.port == 8001
args = parser_with_config.parse_args([
'serve',
'--tensor-parallel-size=2',
'--model=my-model',
'--trust-remote-code',
'--port=8001',
])
assert args.model is None
assert args.tensor_parallel_size == 2
assert args.trust_remote_code is True
assert args.port == 8001
# Test other config values are preserved
args = parser_with_config.parse_args([
......
......@@ -1855,13 +1855,37 @@ class FlexibleArgumentParser(ArgumentParser):
# Check for --model in command line arguments first
if args and args[0] == "serve":
model_in_cli_args = any(arg == '--model' for arg in args)
if model_in_cli_args:
raise ValueError(
try:
model_idx = next(
i for i, arg in enumerate(args)
if arg == "--model" or arg.startswith("--model="))
logger.warning(
"With `vllm serve`, you should provide the model as a "
"positional argument or in a config file instead of via "
"the `--model` option.")
"the `--model` option. "
"The `--model` option will be removed in v0.13.")
if args[model_idx] == "--model":
model_tag = args[model_idx + 1]
rest_start_idx = model_idx + 2
else:
model_tag = args[model_idx].removeprefix("--model=")
rest_start_idx = model_idx + 1
# Move <model> to the front, e,g:
# [Before]
# vllm serve -tp 2 --model <model> --enforce-eager --port 8001
# [After]
# vllm serve <model> -tp 2 --enforce-eager --port 8001
args = [
"serve",
model_tag,
*args[1:model_idx],
*args[rest_start_idx:],
]
print("args", args)
except StopIteration:
pass
if '--config' in args:
args = self._pull_args_from_config(args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment