[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm...

[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm serve` command (#25967) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm...
[CI/Build] Replace `vllm.entrypoints.openai.api_server` entrypoint with `vllm serve` command (#25967) Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
d00d6529 · Cyrus Leung · GitHub · 3b279a84 · d00d6529 · d00d6529
Unverified Commit d00d6529 authored Oct 03, 2025 by Cyrus Leung Committed by GitHub Oct 02, 2025
Show whitespace changes
Inline Side-by-side

Showing with 66 additions and 12 deletions

tests/utils_/test_utils.py tests/utils_/test_utils.py +37 -7

vllm/utils/__init__.py vllm/utils/__init__.py +29 -5

No files found.
--- a/tests/utils_/test_utils.py
+++ b/tests/utils_/test_utils.py
@@ -786,13 +786,43 @@ def test_model_specification(parser_with_config, cli_config_file,
        parser_with_config.parse_args(['serve', '--config', cli_config_file])
    # Test using --model option raises error
-    with pytest.raises(
+    # with pytest.raises(
-            ValueError,
+    #         ValueError,
-            match=
+    #         match=
-        ("With `vllm serve`, you should provide the model as a positional "
+    #     ("With `vllm serve`, you should provide the model as a positional "
-         "argument or in a config file instead of via the `--model` option."),
+    #      "argument or in a config file instead of via the `--model` option."),
-    ):
+    # ):
-        parser_with_config.parse_args(['serve', '--model', 'my-model'])
+    #     parser_with_config.parse_args(['serve', '--model', 'my-model'])
+    # Test using --model option back-compatibility
+    # (when back-compatibility ends, the above test should be uncommented
+    # and the below test should be removed)
+    args = parser_with_config.parse_args([
+        'serve',
+        '--tensor-parallel-size',
+        '2',
+        '--model',
+        'my-model',
+        '--trust-remote-code',
+        '--port',
+        '8001',
+    ])
+    assert args.model is None
+    assert args.tensor_parallel_size == 2
+    assert args.trust_remote_code is True
+    assert args.port == 8001
+    args = parser_with_config.parse_args([
+        'serve',
+        '--tensor-parallel-size=2',
+        '--model=my-model',
+        '--trust-remote-code',
+        '--port=8001',
+    ])
+    assert args.model is None
+    assert args.tensor_parallel_size == 2
+    assert args.trust_remote_code is True
+    assert args.port == 8001
    # Test other config values are preserved
    args = parser_with_config.parse_args([

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -1855,13 +1855,37 @@ class FlexibleArgumentParser(ArgumentParser):
        # Check for --model in command line arguments first
        if args and args[0] == "serve":
-            model_in_cli_args = any(arg == '--model' for arg in args)
+            try:
+                model_idx = next(
-            if model_in_cli_args:
+                    i for i, arg in enumerate(args)
-                raise ValueError(
+                    if arg == "--model" or arg.startswith("--model="))
+                logger.warning(
                    "With `vllm serve`, you should provide the model as a "
                    "positional argument or in a config file instead of via "
-                    "the `--model` option.")
+                    "the `--model` option. "
+                    "The `--model` option will be removed in v0.13.")
+                if args[model_idx] == "--model":
+                    model_tag = args[model_idx + 1]
+                    rest_start_idx = model_idx + 2
+                else:
+                    model_tag = args[model_idx].removeprefix("--model=")
+                    rest_start_idx = model_idx + 1
+                # Move <model> to the front, e,g:
+                # [Before]
+                # vllm serve -tp 2 --model <model> --enforce-eager --port 8001
+                # [After]
+                # vllm serve <model> -tp 2 --enforce-eager --port 8001
+                args = [
+                    "serve",
+                    model_tag,
+                    *args[1:model_idx],
+                    *args[rest_start_idx:],
+                ]
+                print("args", args)
+            except StopIteration:
+                pass
        if '--config' in args:
            args = self._pull_args_from_config(args)