Revert "[Feature] specify model in config.yaml (#14855)" (#15293)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

Revert "[Feature] specify model in config.yaml (#14855)" (#15293)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
baec0d4d · Cyrus Leung · GitHub · c21b99b9 · baec0d4d · c21b99b9
Unverified Commit baec0d4d authored Mar 21, 2025 by Cyrus Leung Committed by GitHub Mar 21, 2025
7 changed files
--- a/docs/source/serving/openai_compatible_server.md
+++ b/docs/source/serving/openai_compatible_server.md
@@ -184,7 +184,6 @@ For example:
 ```yaml
 # config.yaml

-model: meta-llama/Llama-3.1-8B-Instruct
 host: "127.0.0.1"
 port: 6379
 uvicorn-log-level: "info"
@@ -193,13 +192,12 @@ uvicorn-log-level: "info"
 To use the above config file:

 ```bash
-vllm serve --config config.yaml
+vllm serve SOME_MODEL --config config.yaml
 ```

 :::{note}
 In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
 The order of priorities is `command line > config file values > defaults`.
-e.g. `vllm serve SOME_MODEL --config config.yaml`, SOME_MODEL takes precedence over `model` in config file.
 :::

 ## API Reference

--- a/tests/config/test_config_with_model.yaml
+++ b/tests/config/test_config_with_model.yaml
-# Same as test_config.yaml but with model specified
-model: config-model
-port: 12312
-served_model_name: mymodel
-tensor_parallel_size: 2
-trust_remote_code: true
-multi_step_stream_outputs: false
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1121,15 +1121,3 @@ def pytest_collection_modifyitems(config, items):
    for item in items:
        if "optional" in item.keywords:
            item.add_marker(skip_optional)
-
-
-@pytest.fixture(scope="session")
-def cli_config_file():
-    """Return the path to the CLI config file."""
-    return os.path.join(_TEST_DIR, "config", "test_config.yaml")
-
-
-@pytest.fixture(scope="session")
-def cli_config_file_with_model():
-    """Return the path to the CLI config file with model."""
-    return os.path.join(_TEST_DIR, "config", "test_config_with_model.yaml")
--- a/tests/config/test_config.yaml
+++ b/tests/config/test_config.yaml
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -8,7 +8,7 @@ from unittest.mock import patch

 import pytest
 import torch
-from vllm_test_utils.monitor import monitor
+from vllm_test_utils import monitor

 from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
 from vllm.utils import (FlexibleArgumentParser, MemorySnapshot,
@@ -140,8 +140,7 @@ def parser():
 def parser_with_config():
    parser = FlexibleArgumentParser()
    parser.add_argument('serve')
-    parser.add_argument('model_tag', nargs='?')
-    parser.add_argument('--model', type=str)
+    parser.add_argument('model_tag')
    parser.add_argument('--served-model-name', type=str)
    parser.add_argument('--config', type=str)
    parser.add_argument('--port', type=int)
@@ -197,29 +196,29 @@ def test_missing_required_argument(parser):
        parser.parse_args([])


-def test_cli_override_to_config(parser_with_config, cli_config_file):
+def test_cli_override_to_config(parser_with_config):
    args = parser_with_config.parse_args([
-        'serve', 'mymodel', '--config', cli_config_file,
+        'serve', 'mymodel', '--config', './data/test_config.yaml',
        '--tensor-parallel-size', '3'
    ])
    assert args.tensor_parallel_size == 3
    args = parser_with_config.parse_args([
        'serve', 'mymodel', '--tensor-parallel-size', '3', '--config',
-        cli_config_file
+        './data/test_config.yaml'
    ])
    assert args.tensor_parallel_size == 3
    assert args.port == 12312
    args = parser_with_config.parse_args([
        'serve', 'mymodel', '--tensor-parallel-size', '3', '--config',
-        cli_config_file, '--port', '666'
+        './data/test_config.yaml', '--port', '666'
    ])
    assert args.tensor_parallel_size == 3
    assert args.port == 666


-def test_config_args(parser_with_config, cli_config_file):
+def test_config_args(parser_with_config):
    args = parser_with_config.parse_args(
-        ['serve', 'mymodel', '--config', cli_config_file])
+        ['serve', 'mymodel', '--config', './data/test_config.yaml'])
    assert args.tensor_parallel_size == 2
    assert args.trust_remote_code
    assert not args.multi_step_stream_outputs
@@ -241,9 +240,10 @@ def test_config_file(parser_with_config):
        ])


-def test_no_model_tag(parser_with_config, cli_config_file):
+def test_no_model_tag(parser_with_config):
    with pytest.raises(ValueError):
-        parser_with_config.parse_args(['serve', '--config', cli_config_file])
+        parser_with_config.parse_args(
+            ['serve', '--config', './data/test_config.yaml'])


 # yapf: enable
@@ -476,34 +476,3 @@ def test_swap_dict_values(obj, key1, key2):
        assert obj[key1] == original_obj[key2]
    else:
        assert key1 not in obj
-
-
-def test_model_specification(parser_with_config,
-                             cli_config_file,
-                             cli_config_file_with_model):
-    # Test model in CLI takes precedence over config
-    args = parser_with_config.parse_args([
-        'serve', 'cli-model', '--config', cli_config_file_with_model
-    ])
-    assert args.model_tag == 'cli-model'
-    assert args.served_model_name == 'mymodel'
-
-    # Test model from config file works
-    args = parser_with_config.parse_args([
-        'serve', '--config', cli_config_file_with_model
-    ])
-    assert args.model == 'config-model'
-    assert args.served_model_name == 'mymodel'
-
-    # Test no model specified anywhere raises error
-    with pytest.raises(ValueError, match="No model specified!"):
-        parser_with_config.parse_args(['serve', '--config', cli_config_file])
-
-    # Test other config values are preserved
-    args = parser_with_config.parse_args([
-        'serve', 'cli-model', '--config', cli_config_file_with_model
-    ])
-    assert args.tensor_parallel_size == 2
-    assert args.trust_remote_code is True
-    assert args.multi_step_stream_outputs is False
-    assert args.port == 12312
--- a/vllm/entrypoints/cli/serve.py
+++ b/vllm/entrypoints/cli/serve.py
@@ -21,16 +21,14 @@ class ServeSubcommand(CLISubcommand):

    @staticmethod
    def cmd(args: argparse.Namespace) -> None:
-        # If model is specified in CLI (as positional arg), it takes precedence
-        if hasattr(args, 'model_tag') and args.model_tag is not None:
-            args.model = args.model_tag
-        # Otherwise use model from config (already in args.model)
-
-        # Check if we have a model specified somewhere
-        if args.model == EngineArgs.model:  # Still has default value
+        # The default value of `--model`
+        if args.model != EngineArgs.model:
            raise ValueError(
-                "With `vllm serve`, you should provide the model either as a "
-                "positional argument or in config file.")
+                "With `vllm serve`, you should provide the model as a "
+                "positional argument instead of via the `--model` option.")
+
+        # EngineArgs expects the model name to be passed as --model.
+        args.model = args.model_tag

        uvloop.run(run_server(args))

@@ -43,12 +41,10 @@ class ServeSubcommand(CLISubcommand):
        serve_parser = subparsers.add_parser(
            "serve",
            help="Start the vLLM OpenAI Compatible API server",
-            usage="vllm serve [model_tag] [options]")
+            usage="vllm serve <model_tag> [options]")
        serve_parser.add_argument("model_tag",
                                  type=str,
-                                  nargs='?',
-                                  help="The model tag to serve "
-                                  "(optional if specified in config)")
+                                  help="The model tag to serve")
        serve_parser.add_argument(
            "--config",
            type=str,

--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1265,29 +1265,19 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
        config_args = self._load_config_file(file_path)

        # 0th index is for {serve,chat,complete}
-        # optionally followed by model_tag (only for serve)
+        # followed by model_tag (only for serve)
        # followed by config args
        # followed by rest of cli args.
        # maintaining this order will enforce the precedence
        # of cli > config > defaults
        if args[0] == "serve":
-            model_in_cli = len(args) > 1 and not args[1].startswith('-')
-            model_in_config = any(arg == '--model' for arg in config_args)
-
-            if not model_in_cli and not model_in_config:
+            if index == 1:
                raise ValueError(
-                    "No model specified! Please specify model either in "
-                    "command-line arguments or in config file.")
-
-            if model_in_cli:
-                # Model specified as positional arg, keep CLI version
+                    "No model_tag specified! Please check your command-line"
+                    " arguments.")
            args = [args[0]] + [
                args[1]
            ] + config_args + args[2:index] + args[index + 2:]
-            else:
-                # No model in CLI, use config if available
-                args = [args[0]
-                        ] + config_args + args[1:index] + args[index + 2:]
        else:
            args = [args[0]] + config_args + args[1:index] + args[index + 2:]

@@ -1305,7 +1295,9 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
                '--port': '12323',
                '--tensor-parallel-size': '4'
            ]
+
        """
+
        extension: str = file_path.split('.')[-1]
        if extension not in ('yaml', 'yml'):
            raise ValueError(
@@ -1330,15 +1322,7 @@ class FlexibleArgumentParser(argparse.ArgumentParser):
            if isinstance(action, StoreBoolean)
        ]

-        # Skip model from config if it's provided as positional argument
-        skip_model = (hasattr(self, '_parsed_args') and self._parsed_args
-                      and len(self._parsed_args) > 1
-                      and self._parsed_args[0] == 'serve'
-                      and not self._parsed_args[1].startswith('-'))
-
        for key, value in config.items():
-            if skip_model and key == 'model':
-                continue
            if isinstance(value, bool) and key not in store_boolean_arguments:
                if value:
                    processed_args.append('--' + key)