Unverified Commit d00d6529 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm...


[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm serve` command (#25967)
Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent 3b279a84
...@@ -786,13 +786,43 @@ def test_model_specification(parser_with_config, cli_config_file, ...@@ -786,13 +786,43 @@ def test_model_specification(parser_with_config, cli_config_file,
parser_with_config.parse_args(['serve', '--config', cli_config_file]) parser_with_config.parse_args(['serve', '--config', cli_config_file])
# Test using --model option raises error # Test using --model option raises error
with pytest.raises( # with pytest.raises(
ValueError, # ValueError,
match= # match=
("With `vllm serve`, you should provide the model as a positional " # ("With `vllm serve`, you should provide the model as a positional "
"argument or in a config file instead of via the `--model` option."), # "argument or in a config file instead of via the `--model` option."),
): # ):
parser_with_config.parse_args(['serve', '--model', 'my-model']) # parser_with_config.parse_args(['serve', '--model', 'my-model'])
# Test using --model option back-compatibility
# (when back-compatibility ends, the above test should be uncommented
# and the below test should be removed)
args = parser_with_config.parse_args([
'serve',
'--tensor-parallel-size',
'2',
'--model',
'my-model',
'--trust-remote-code',
'--port',
'8001',
])
assert args.model is None
assert args.tensor_parallel_size == 2
assert args.trust_remote_code is True
assert args.port == 8001
args = parser_with_config.parse_args([
'serve',
'--tensor-parallel-size=2',
'--model=my-model',
'--trust-remote-code',
'--port=8001',
])
assert args.model is None
assert args.tensor_parallel_size == 2
assert args.trust_remote_code is True
assert args.port == 8001
# Test other config values are preserved # Test other config values are preserved
args = parser_with_config.parse_args([ args = parser_with_config.parse_args([
......
...@@ -1855,13 +1855,37 @@ class FlexibleArgumentParser(ArgumentParser): ...@@ -1855,13 +1855,37 @@ class FlexibleArgumentParser(ArgumentParser):
# Check for --model in command line arguments first # Check for --model in command line arguments first
if args and args[0] == "serve": if args and args[0] == "serve":
model_in_cli_args = any(arg == '--model' for arg in args) try:
model_idx = next(
if model_in_cli_args: i for i, arg in enumerate(args)
raise ValueError( if arg == "--model" or arg.startswith("--model="))
logger.warning(
"With `vllm serve`, you should provide the model as a " "With `vllm serve`, you should provide the model as a "
"positional argument or in a config file instead of via " "positional argument or in a config file instead of via "
"the `--model` option.") "the `--model` option. "
"The `--model` option will be removed in v0.13.")
if args[model_idx] == "--model":
model_tag = args[model_idx + 1]
rest_start_idx = model_idx + 2
else:
model_tag = args[model_idx].removeprefix("--model=")
rest_start_idx = model_idx + 1
# Move <model> to the front, e,g:
# [Before]
# vllm serve -tp 2 --model <model> --enforce-eager --port 8001
# [After]
# vllm serve <model> -tp 2 --enforce-eager --port 8001
args = [
"serve",
model_tag,
*args[1:model_idx],
*args[rest_start_idx:],
]
print("args", args)
except StopIteration:
pass
if '--config' in args: if '--config' in args:
args = self._pull_args_from_config(args) args = self._pull_args_from_config(args)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment