[frontend] Refactor CLI Args for a better modular integration (#20206)

Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>

[frontend] Refactor CLI Args for a better modular integration (#20206)
Signed-off-by: Kourosh Hakhamaneshi <kourosh@anyscale.com>
f148c44c · kourosh hakhamaneshi · GitHub · 235bfd5d · f148c44c · f148c44c
Unverified Commit f148c44c authored Jul 15, 2025 by kourosh hakhamaneshi Committed by GitHub Jul 15, 2025
Show whitespace changes
Inline Side-by-side

Showing with 167 additions and 212 deletions

.pre-commit-config.yaml .pre-commit-config.yaml +1 -1

vllm/entrypoints/openai/cli_args.py vllm/entrypoints/openai/cli_args.py +166 -211

No files found.
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -166,7 +166,7 @@ repos:
    language: python
    types: [python]
    pass_filenames: true
-    files: vllm/config.py|tests/test_config.py
+    files: vllm/config.py|tests/test_config.py|vllm/entrypoints/openai/cli_args.py
  # Keep `suggestion` last
  - id: suggestion
    name: Suggestion

--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -10,9 +10,13 @@ import argparse
 import json
 import ssl
 from collections.abc import Sequence
-from typing import Optional, Union, get_args
+from dataclasses import field
+from typing import Literal, Optional, Union
+from pydantic.dataclasses import dataclass
 import vllm.envs as envs
+from vllm.config import config
 from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
 from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         validate_chat_template)
@@ -82,220 +86,171 @@ class PromptAdapterParserAction(argparse.Action):
        setattr(namespace, self.dest, adapter_list)
-def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
+@config
-    parser.add_argument("--host",
+@dataclass
-                        type=optional_type(str),
+class FrontendArgs:
-                        default=None,
+    """Arguments for the OpenAI-compatible frontend server."""
-                        help="Host name.")
+    host: Optional[str] = None
-    parser.add_argument("--port", type=int, default=8000, help="Port number.")
+    """Host name."""
-    parser.add_argument(
+    port: int = 8000
-        "--uvicorn-log-level",
+    """Port number."""
-        type=str,
+    uvicorn_log_level: Literal["debug", "info", "warning", "error", "critical",
-        default="info",
+                               "trace"] = "info"
-        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
+    """Log level for uvicorn."""
-        help="Log level for uvicorn.")
+    disable_uvicorn_access_log: bool = False
-    parser.add_argument("--disable-uvicorn-access-log",
+    """Disable uvicorn access log."""
-                        action="store_true",
+    allow_credentials: bool = False
-                        help="Disable uvicorn access log.")
+    """Allow credentials."""
-    parser.add_argument("--allow-credentials",
+    allowed_origins: list[str] = field(default_factory=lambda: ["*"])
-                        action="store_true",
+    """Allowed origins."""
-                        help="Allow credentials.")
+    allowed_methods: list[str] = field(default_factory=lambda: ["*"])
-    parser.add_argument("--allowed-origins",
+    """Allowed methods."""
-                        type=json.loads,
+    allowed_headers: list[str] = field(default_factory=lambda: ["*"])
-                        default=["*"],
+    """Allowed headers."""
-                        help="Allowed origins.")
+    api_key: Optional[str] = None
-    parser.add_argument("--allowed-methods",
+    """If provided, the server will require this key to be presented in the
-                        type=json.loads,
+    header."""
-                        default=["*"],
+    lora_modules: Optional[list[LoRAModulePath]] = None
-                        help="Allowed methods.")
+    """LoRA modules configurations in either 'name=path' format or JSON format
-    parser.add_argument("--allowed-headers",
+    or JSON list format. Example (old format): `'name=path'` Example (new 
-                        type=json.loads,
+    format): `{\"name\": \"name\", \"path\": \"lora_path\", 
-                        default=["*"],
+    \"base_model_name\": \"id\"}`"""
-                        help="Allowed headers.")
+    prompt_adapters: Optional[list[PromptAdapterPath]] = None
-    parser.add_argument("--api-key",
+    """Prompt adapter configurations in the format name=path. Multiple adapters 
-                        type=optional_type(str),
+    can be specified."""
-                        default=None,
+    chat_template: Optional[str] = None
-                        help="If provided, the server will require this key "
+    """The file path to the chat template, or the template in single-line form 
-                        "to be presented in the header.")
+    for the specified model."""
-    parser.add_argument(
+    chat_template_content_format: ChatTemplateContentFormatOption = "auto"
-        "--lora-modules",
+    """The format to render message content within a chat template.
-        type=optional_type(str),
-        default=None,
-        nargs='+',
-        action=LoRAParserAction,
-        help="LoRA module configurations in either 'name=path' format"
-        "or JSON format. "
-        "Example (old format): ``'name=path'`` "
-        "Example (new format): "
-        "``{\"name\": \"name\", \"path\": \"lora_path\", "
-        "\"base_model_name\": \"id\"}``")
-    parser.add_argument(
-        "--prompt-adapters",
-        type=optional_type(str),
-        default=None,
-        nargs='+',
-        action=PromptAdapterParserAction,
-        help="Prompt adapter configurations in the format name=path. "
-        "Multiple adapters can be specified.")
-    parser.add_argument("--chat-template",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the chat template, "
-                        "or the template in single-line form "
-                        "for the specified model.")
-    parser.add_argument(
-        '--chat-template-content-format',
-        type=str,
-        default="auto",
-        choices=get_args(ChatTemplateContentFormatOption),
-        help='The format to render message content within a chat template.'
-        '\n\n'
-        '* "string" will render the content as a string. '
-        'Example: ``"Hello World"``\n'
-        '* "openai" will render the content as a list of dictionaries, '
-        'similar to OpenAI schema. '
-        'Example: ``[{"type": "text", "text": "Hello world!"}]``')
-    parser.add_argument("--response-role",
-                        type=optional_type(str),
-                        default="assistant",
-                        help="The role name to return if "
-                        "``request.add_generation_prompt=true``.")
-    parser.add_argument("--ssl-keyfile",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the SSL key file.")
-    parser.add_argument("--ssl-certfile",
-                        type=optional_type(str),
-                        default=None,
-                        help="The file path to the SSL cert file.")
-    parser.add_argument("--ssl-ca-certs",
-                        type=optional_type(str),
-                        default=None,
-                        help="The CA certificates file.")
-    parser.add_argument(
-        "--enable-ssl-refresh",
-        action="store_true",
-        default=False,
-        help="Refresh SSL Context when SSL certificate files change")
-    parser.add_argument(
-        "--ssl-cert-reqs",
-        type=int,
-        default=int(ssl.CERT_NONE),
-        help="Whether client certificate is required (see stdlib ssl module's)."
-    )
-    parser.add_argument(
-        "--root-path",
-        type=optional_type(str),
-        default=None,
-        help="FastAPI root_path when app is behind a path based routing proxy."
-    )
-    parser.add_argument(
-        "--middleware",
-        type=optional_type(str),
-        action="append",
-        default=[],
-        help="Additional ASGI middleware to apply to the app. "
-        "We accept multiple --middleware arguments. "
-        "The value should be an import path. "
-        "If a function is provided, vLLM will add it to the server "
-        "using ``@app.middleware('http')``. "
-        "If a class is provided, vLLM will add it to the server "
-        "using ``app.add_middleware()``. ")
-    parser.add_argument(
-        "--return-tokens-as-token-ids",
-        action="store_true",
-        help="When ``--max-logprobs`` is specified, represents single tokens "
-        " as strings of the form 'token_id:{token_id}' so that tokens "
-        "that are not JSON-encodable can be identified.")
-    parser.add_argument(
-        "--disable-frontend-multiprocessing",
-        action="store_true",
-        help="If specified, will run the OpenAI frontend server in the same "
-        "process as the model serving engine.")
-    parser.add_argument(
-        "--enable-request-id-headers",
-        action="store_true",
-        help="If specified, API server will add X-Request-Id header to "
-        "responses.")
-    parser.add_argument(
-        "--enable-auto-tool-choice",
-        action="store_true",
-        default=False,
-        help="Enable auto tool choice for supported models. Use "
-        "``--tool-call-parser`` to specify which parser to use.")
-    parser.add_argument(
-        "--expand-tools-even-if-tool-choice-none",
-        action="store_true",
-        default=False,
-        deprecated=True,
-        help="Include tool definitions in prompts "
-        "even when tool_choice='none'. "
-        "This is a transitional option that will be removed in v0.10.0. "
-        "In v0.10.0, tool definitions will always be included regardless of "
-        "tool_choice setting. Use this flag now to test the new behavior "
-        "before the breaking change.")
-    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
-    parser.add_argument(
-        "--tool-call-parser",
-        type=str,
-        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
-        "--tool-parser-plugin",
-        default=None,
-        help=
-        "Select the tool call parser depending on the model that you're using."
-        " This is used to parse the model-generated tool call into OpenAI API "
-        "format. Required for ``--enable-auto-tool-choice``.")
-    parser.add_argument(
-        "--tool-parser-plugin",
-        type=str,
-        default="",
-        help=
-        "Special the tool parser plugin write to parse the model-generated tool"
-        " into OpenAI API format, the name register in this plugin can be used "
-        "in ``--tool-call-parser``.")
-    parser.add_argument(
-        "--log-config-file",
-        type=str,
-        default=envs.VLLM_LOGGING_CONFIG_PATH,
-        help="Path to logging config JSON file for both vllm and uvicorn",
-    )
-    parser = AsyncEngineArgs.add_cli_args(parser)
+* "string" will render the content as a string. Example: `"Hello World"`
+* "openai" will render the content as a list of dictionaries, similar to OpenAI 
+schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
+    response_role: str = "assistant"
+    """The role name to return if `request.add_generation_prompt=true`."""
+    ssl_keyfile: Optional[str] = None
+    """The file path to the SSL key file."""
+    ssl_certfile: Optional[str] = None
+    """The file path to the SSL cert file."""
+    ssl_ca_certs: Optional[str] = None
+    """The CA certificates file."""
+    enable_ssl_refresh: bool = False
+    """Refresh SSL Context when SSL certificate files change"""
+    ssl_cert_reqs: int = int(ssl.CERT_NONE)
+    """Whether client certificate is required (see stdlib ssl module's)."""
+    root_path: Optional[str] = None
+    """FastAPI root_path when app is behind a path based routing proxy."""
+    middleware: list[str] = field(default_factory=lambda: [])
+    """Additional ASGI middleware to apply to the app. We accept multiple 
+    --middleware arguments. The value should be an import path. If a function 
+    is provided, vLLM will add it to the server using 
+    `@app.middleware('http')`. If a class is provided, vLLM will 
+    add it to the server using `app.add_middleware()`."""
+    return_tokens_as_token_ids: bool = False
+    """When `--max-logprobs` is specified, represents single tokens as 
+    strings of the form 'token_id:{token_id}' so that tokens that are not 
+    JSON-encodable can be identified."""
+    disable_frontend_multiprocessing: bool = False
+    """If specified, will run the OpenAI frontend server in the same process as 
+    the model serving engine."""
+    enable_request_id_headers: bool = False
+    """If specified, API server will add X-Request-Id header to responses. 
+    Caution: this hurts performance at high QPS."""
+    enable_auto_tool_choice: bool = False
+    """Enable auto tool choice for supported models. Use `--tool-call-parser` 
+    to specify which parser to use."""
+    tool_call_parser: Optional[str] = None
+    """Select the tool call parser depending on the model that you're using. 
+    This is used to parse the model-generated tool call into OpenAI API format. 
+    Required for `--enable-auto-tool-choice`. You can choose any option from 
+    the built-in parsers or register a plugin via `--tool-parser-plugin`."""
+    tool_parser_plugin: str = ""
+    """Special the tool parser plugin write to parse the model-generated tool 
+    into OpenAI API format, the name register in this plugin can be used in 
+    `--tool-call-parser`."""
+    log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
+    """Path to logging config JSON file for both vllm and uvicorn"""
+    max_log_len: Optional[int] = None
+    """Max number of prompt characters or prompt ID numbers being printed in 
+    log. The default of None means unlimited."""
+    disable_fastapi_docs: bool = False
+    """Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
+    enable_prompt_tokens_details: bool = False
+    """If set to True, enable prompt_tokens_details in usage."""
+    enable_server_load_tracking: bool = False
+    """If set to True, enable tracking server_load_metrics in the app state."""
+    enable_force_include_usage: bool = False
+    """If set to True, including usage on every request."""
+    expand_tools_even_if_tool_choice_none: bool = False
+    """Include tool definitions in prompts even when `tool_choice='none'`.
-    parser.add_argument('--max-log-len',
+    This is a transitional option that will be removed in v0.10.0. In
-                        type=int,
+    v0.10.0, tool definitions will always be included regardless of
-                        default=None,
+    `tool_choice` setting. Use this flag to test the upcoming behavior
-                        help='Max number of prompt characters or prompt '
+    before the breaking change."""
-                        'ID numbers being printed in log.'
-                        ' The default of None means unlimited.')
+    @staticmethod
+    def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
-    parser.add_argument(
+        from vllm.engine.arg_utils import get_kwargs
-        "--disable-fastapi-docs",
-        action='store_true',
+        frontend_kwargs = get_kwargs(FrontendArgs)
-        default=False,
-        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
+        # Special case: allowed_origins, allowed_methods, allowed_headers all
-    )
+        # need json.loads type
-    parser.add_argument(
+        # Should also remove nargs
-        "--enable-prompt-tokens-details",
+        print(frontend_kwargs["allowed_origins"])
-        action='store_true',
+        frontend_kwargs["allowed_origins"]["type"] = json.loads
-        default=False,
+        frontend_kwargs["allowed_methods"]["type"] = json.loads
-        help="If set to True, enable prompt_tokens_details in usage.")
+        frontend_kwargs["allowed_headers"]["type"] = json.loads
-    parser.add_argument(
+        del frontend_kwargs["allowed_origins"]["nargs"]
-        "--enable-force-include-usage",
+        del frontend_kwargs["allowed_methods"]["nargs"]
-        action='store_true',
+        del frontend_kwargs["allowed_headers"]["nargs"]
-        default=False,
-        help="If set to True, including usage on every request.")
+        # Special case: LoRA modules need custom parser action and
-    parser.add_argument(
+        # optional_type(str)
-        "--enable-server-load-tracking",
+        frontend_kwargs["lora_modules"]["type"] = optional_type(str)
-        action='store_true',
+        frontend_kwargs["lora_modules"]["action"] = LoRAParserAction
-        default=False,
-        help=
+        # Special case: Prompt adapters need custom parser action and
-        "If set to True, enable tracking server_load_metrics in the app state."
+        # optional_type(str)
+        frontend_kwargs["prompt_adapters"]["type"] = optional_type(str)
+        frontend_kwargs["prompt_adapters"][
+            "action"] = PromptAdapterParserAction
+        # Special case: Middleware needs append action
+        frontend_kwargs["middleware"]["action"] = "append"
+        # Special case: Tool call parser shows built-in options.
+        valid_tool_parsers = list(ToolParserManager.tool_parsers.keys())
+        frontend_kwargs["tool_call_parser"]["choices"] = valid_tool_parsers
+        # Special case for expand-tools-even-if-tool-choice-none because of
+        # the deprecation field
+        frontend_kwargs["expand_tools_even_if_tool_choice_none"]\
+            ["deprecated"] = True
+        frontend_group = parser.add_argument_group(
+            title="Frontend",
+            description=FrontendArgs.__doc__,
        )
+        for key, value in frontend_kwargs.items():
+            frontend_group.add_argument(f"--{key.replace('_', '-')}", **value)
+        return parser
+def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
+    """Create the CLI argument parser used by the OpenAI API server.
+    We rely on the helper methods of `FrontendArgs` and `AsyncEngineArgs` to
+    register all arguments instead of manually enumerating them here. This
+    avoids code duplication and keeps the argument definitions in one place.
+    """
+    parser = FrontendArgs.add_cli_args(parser)
+    parser = AsyncEngineArgs.add_cli_args(parser)
    return parser