cli_args.py 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
"""
This file contains the command line arguments for the vLLM's
OpenAI-compatible server. It is kept in a separate file for documentation
purposes.
"""

import argparse
import json
import ssl
10
from typing import List, Optional, Sequence, Union, get_args
11

12
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
13
14
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         validate_chat_template)
15
16
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
                                                    PromptAdapterPath)
17
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
18
from vllm.utils import FlexibleArgumentParser
19
20
21
22


class LoRAParserAction(argparse.Action):

23
24
25
26
27
28
29
30
31
32
33
34
35
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        lora_list: List[LoRAModulePath] = []
36
        for item in values:
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
            if item in [None, '']:  # Skip if item is None or empty string
                continue
            if '=' in item and ',' not in item:  # Old format: name=path
                name, path = item.split('=')
                lora_list.append(LoRAModulePath(name, path))
            else:  # Assume JSON format
                try:
                    lora_dict = json.loads(item)
                    lora = LoRAModulePath(**lora_dict)
                    lora_list.append(lora)
                except json.JSONDecodeError:
                    parser.error(
                        f"Invalid JSON format for --lora-modules: {item}")
                except TypeError as e:
                    parser.error(
                        f"Invalid fields for --lora-modules: {item} - {str(e)}"
                    )
54
55
56
        setattr(namespace, self.dest, lora_list)


57
58
class PromptAdapterParserAction(argparse.Action):

59
60
61
62
63
64
65
66
67
68
69
70
71
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        adapter_list: List[PromptAdapterPath] = []
72
73
74
75
76
77
        for item in values:
            name, path = item.split('=')
            adapter_list.append(PromptAdapterPath(name, path))
        setattr(namespace, self.dest, adapter_list)


Ethan Xu's avatar
Ethan Xu committed
78
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
79
80
    parser.add_argument("--host",
                        type=nullable_str,
81
                        default=None,
82
                        help="host name")
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
    parser.add_argument("--port", type=int, default=8000, help="port number")
    parser.add_argument(
        "--uvicorn-log-level",
        type=str,
        default="info",
        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
        help="log level for uvicorn")
    parser.add_argument("--allow-credentials",
                        action="store_true",
                        help="allow credentials")
    parser.add_argument("--allowed-origins",
                        type=json.loads,
                        default=["*"],
                        help="allowed origins")
    parser.add_argument("--allowed-methods",
                        type=json.loads,
                        default=["*"],
                        help="allowed methods")
    parser.add_argument("--allowed-headers",
                        type=json.loads,
                        default=["*"],
                        help="allowed headers")
    parser.add_argument("--api-key",
106
                        type=nullable_str,
107
108
109
110
111
                        default=None,
                        help="If provided, the server will require this key "
                        "to be presented in the header.")
    parser.add_argument(
        "--lora-modules",
112
        type=nullable_str,
113
114
115
        default=None,
        nargs='+',
        action=LoRAParserAction,
116
117
118
119
120
121
        help="LoRA module configurations in either 'name=path' format"
        "or JSON format. "
        "Example (old format): 'name=path' "
        "Example (new format): "
        "'{\"name\": \"name\", \"local_path\": \"path\", "
        "\"base_model_name\": \"id\"}'")
122
123
124
125
126
127
128
129
    parser.add_argument(
        "--prompt-adapters",
        type=nullable_str,
        default=None,
        nargs='+',
        action=PromptAdapterParserAction,
        help="Prompt adapter configurations in the format name=path. "
        "Multiple adapters can be specified.")
130
    parser.add_argument("--chat-template",
131
                        type=nullable_str,
132
133
134
135
                        default=None,
                        help="The file path to the chat template, "
                        "or the template in single-line form "
                        "for the specified model")
136
137
138
139
140
141
142
143
144
145
146
147
    parser.add_argument(
        '--chat-template-content-format',
        type=str,
        default="auto",
        choices=get_args(ChatTemplateContentFormatOption),
        help='The format to render message content within a chat template.'
        '\n\n'
        '* "string" will render the content as a string. '
        'Example: "Hello World"\n'
        '* "openai" will render the content as a list of dictionaries, '
        'similar to OpenAI schema. '
        'Example: [{"type": "text", "text": "Hello world!"}]')
148
    parser.add_argument("--response-role",
149
                        type=nullable_str,
150
151
152
153
                        default="assistant",
                        help="The role name to return if "
                        "`request.add_generation_prompt=true`.")
    parser.add_argument("--ssl-keyfile",
154
                        type=nullable_str,
155
156
157
                        default=None,
                        help="The file path to the SSL key file")
    parser.add_argument("--ssl-certfile",
158
                        type=nullable_str,
159
160
161
                        default=None,
                        help="The file path to the SSL cert file")
    parser.add_argument("--ssl-ca-certs",
162
                        type=nullable_str,
163
164
165
166
167
168
169
170
171
172
                        default=None,
                        help="The CA certificates file")
    parser.add_argument(
        "--ssl-cert-reqs",
        type=int,
        default=int(ssl.CERT_NONE),
        help="Whether client certificate is required (see stdlib ssl module's)"
    )
    parser.add_argument(
        "--root-path",
173
        type=nullable_str,
174
175
176
177
        default=None,
        help="FastAPI root_path when app is behind a path based routing proxy")
    parser.add_argument(
        "--middleware",
178
        type=nullable_str,
179
180
181
182
183
184
185
186
187
        action="append",
        default=[],
        help="Additional ASGI middleware to apply to the app. "
        "We accept multiple --middleware arguments. "
        "The value should be an import path. "
        "If a function is provided, vLLM will add it to the server "
        "using @app.middleware('http'). "
        "If a class is provided, vLLM will add it to the server "
        "using app.add_middleware(). ")
188
189
190
    parser.add_argument(
        "--return-tokens-as-token-ids",
        action="store_true",
191
192
        help="When --max-logprobs is specified, represents single tokens as "
        "strings of the form 'token_id:{token_id}' so that tokens that "
193
        "are not JSON-encodable can be identified.")
194
195
196
197
198
    parser.add_argument(
        "--disable-frontend-multiprocessing",
        action="store_true",
        help="If specified, will run the OpenAI frontend server in the same "
        "process as the model serving engine.")
199
200
201
202
203
    parser.add_argument(
        "--enable-request-id-headers",
        action="store_true",
        help="If specified, API server will add X-Request-Id header to "
        "responses. Caution: this hurts performance at high QPS.")
204
205
206
207
208
209
    parser.add_argument(
        "--enable-auto-tool-choice",
        action="store_true",
        default=False,
        help=
        "Enable auto tool choice for supported models. Use --tool-call-parser"
210
        " to specify which parser to use")
211

212
    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
213
214
215
    parser.add_argument(
        "--tool-call-parser",
        type=str,
216
217
        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
        "--tool-parser-plugin",
218
219
220
221
222
223
        default=None,
        help=
        "Select the tool call parser depending on the model that you're using."
        " This is used to parse the model-generated tool call into OpenAI API "
        "format. Required for --enable-auto-tool-choice.")

224
225
226
227
228
229
230
231
232
    parser.add_argument(
        "--tool-parser-plugin",
        type=str,
        default="",
        help=
        "Special the tool parser plugin write to parse the model-generated tool"
        " into OpenAI API format, the name register in this plugin can be used "
        "in --tool-call-parser.")

233
    parser = AsyncEngineArgs.add_cli_args(parser)
234
235
236
237
238
239
240
241

    parser.add_argument('--max-log-len',
                        type=int,
                        default=None,
                        help='Max number of prompt characters or prompt '
                        'ID numbers being printed in log.'
                        '\n\nDefault: Unlimited')

242
243
244
245
246
247
    parser.add_argument(
        "--disable-fastapi-docs",
        action='store_true',
        default=False,
        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
    )
248
249
250
251
252
    parser.add_argument(
        "--enable-prompt-tokens-details",
        action='store_true',
        default=False,
        help="If set to True, enable prompt_tokens_details in usage.")
253

254
    return parser
Ethan Xu's avatar
Ethan Xu committed
255
256


257
258
259
260
261
262
263
264
265
266
267
268
269
270
def validate_parsed_serve_args(args: argparse.Namespace):
    """Quick checks for model serve args that raise prior to loading."""
    if hasattr(args, "subparser") and args.subparser != "serve":
        return

    # Ensure that the chat template is valid; raises if it likely isn't
    validate_chat_template(args.chat_template)

    # Enable auto tool needs a tool call parser to be valid
    if args.enable_auto_tool_choice and not args.tool_call_parser:
        raise TypeError("Error: --enable-auto-tool-choice requires "
                        "--tool-call-parser")


Ethan Xu's avatar
Ethan Xu committed
271
272
273
274
def create_parser_for_docs() -> FlexibleArgumentParser:
    parser_for_docs = FlexibleArgumentParser(
        prog="-m vllm.entrypoints.openai.api_server")
    return make_arg_parser(parser_for_docs)