cli_args.py 10.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
"""
This file contains the command line arguments for the vLLM's
OpenAI-compatible server. It is kept in a separate file for documentation
purposes.
"""

import argparse
import json
import ssl
10
from typing import List, Optional, Sequence, Union, get_args
11

12
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
13
14
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
                                         validate_chat_template)
15
from vllm.entrypoints.openai.serving_models import (LoRAModulePath,
16
                                                    PromptAdapterPath)
17
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
18
from vllm.utils import FlexibleArgumentParser
19
20
21
22


class LoRAParserAction(argparse.Action):

23
24
25
26
27
28
29
30
31
32
33
34
35
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        lora_list: List[LoRAModulePath] = []
36
        for item in values:
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
            if item in [None, '']:  # Skip if item is None or empty string
                continue
            if '=' in item and ',' not in item:  # Old format: name=path
                name, path = item.split('=')
                lora_list.append(LoRAModulePath(name, path))
            else:  # Assume JSON format
                try:
                    lora_dict = json.loads(item)
                    lora = LoRAModulePath(**lora_dict)
                    lora_list.append(lora)
                except json.JSONDecodeError:
                    parser.error(
                        f"Invalid JSON format for --lora-modules: {item}")
                except TypeError as e:
                    parser.error(
                        f"Invalid fields for --lora-modules: {item} - {str(e)}"
                    )
54
55
56
        setattr(namespace, self.dest, lora_list)


57
58
class PromptAdapterParserAction(argparse.Action):

59
60
61
62
63
64
65
66
67
68
69
70
71
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        adapter_list: List[PromptAdapterPath] = []
72
73
74
75
76
77
        for item in values:
            name, path = item.split('=')
            adapter_list.append(PromptAdapterPath(name, path))
        setattr(namespace, self.dest, adapter_list)


Ethan Xu's avatar
Ethan Xu committed
78
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
79
80
    parser.add_argument("--host",
                        type=nullable_str,
81
                        default=None,
82
83
                        help="Host name.")
    parser.add_argument("--port", type=int, default=8000, help="Port number.")
84
85
86
87
88
    parser.add_argument(
        "--uvicorn-log-level",
        type=str,
        default="info",
        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
89
        help="Log level for uvicorn.")
90
91
    parser.add_argument("--allow-credentials",
                        action="store_true",
92
                        help="Allow credentials.")
93
94
95
    parser.add_argument("--allowed-origins",
                        type=json.loads,
                        default=["*"],
96
                        help="Allowed origins.")
97
98
99
    parser.add_argument("--allowed-methods",
                        type=json.loads,
                        default=["*"],
100
                        help="Allowed methods.")
101
102
103
    parser.add_argument("--allowed-headers",
                        type=json.loads,
                        default=["*"],
104
                        help="Allowed headers.")
105
    parser.add_argument("--api-key",
106
                        type=nullable_str,
107
108
109
110
111
                        default=None,
                        help="If provided, the server will require this key "
                        "to be presented in the header.")
    parser.add_argument(
        "--lora-modules",
112
        type=nullable_str,
113
114
115
        default=None,
        nargs='+',
        action=LoRAParserAction,
116
117
        help="LoRA module configurations in either 'name=path' format"
        "or JSON format. "
118
        "Example (old format): ``'name=path'`` "
119
        "Example (new format): "
120
        "``{\"name\": \"name\", \"path\": \"lora_path\", "
121
        "\"base_model_name\": \"id\"}``")
122
123
124
125
126
127
128
129
    parser.add_argument(
        "--prompt-adapters",
        type=nullable_str,
        default=None,
        nargs='+',
        action=PromptAdapterParserAction,
        help="Prompt adapter configurations in the format name=path. "
        "Multiple adapters can be specified.")
130
    parser.add_argument("--chat-template",
131
                        type=nullable_str,
132
133
134
                        default=None,
                        help="The file path to the chat template, "
                        "or the template in single-line form "
135
                        "for the specified model.")
136
137
138
139
140
141
142
143
    parser.add_argument(
        '--chat-template-content-format',
        type=str,
        default="auto",
        choices=get_args(ChatTemplateContentFormatOption),
        help='The format to render message content within a chat template.'
        '\n\n'
        '* "string" will render the content as a string. '
144
        'Example: ``"Hello World"``\n'
145
146
        '* "openai" will render the content as a list of dictionaries, '
        'similar to OpenAI schema. '
147
        'Example: ``[{"type": "text", "text": "Hello world!"}]``')
148
    parser.add_argument("--response-role",
149
                        type=nullable_str,
150
151
                        default="assistant",
                        help="The role name to return if "
152
                        "``request.add_generation_prompt=true``.")
153
    parser.add_argument("--ssl-keyfile",
154
                        type=nullable_str,
155
                        default=None,
156
                        help="The file path to the SSL key file.")
157
    parser.add_argument("--ssl-certfile",
158
                        type=nullable_str,
159
                        default=None,
160
                        help="The file path to the SSL cert file.")
161
    parser.add_argument("--ssl-ca-certs",
162
                        type=nullable_str,
163
                        default=None,
164
                        help="The CA certificates file.")
165
166
167
168
    parser.add_argument(
        "--ssl-cert-reqs",
        type=int,
        default=int(ssl.CERT_NONE),
169
        help="Whether client certificate is required (see stdlib ssl module's)."
170
171
172
    )
    parser.add_argument(
        "--root-path",
173
        type=nullable_str,
174
        default=None,
175
176
        help="FastAPI root_path when app is behind a path based routing proxy."
    )
177
178
    parser.add_argument(
        "--middleware",
179
        type=nullable_str,
180
181
182
183
184
185
        action="append",
        default=[],
        help="Additional ASGI middleware to apply to the app. "
        "We accept multiple --middleware arguments. "
        "The value should be an import path. "
        "If a function is provided, vLLM will add it to the server "
186
        "using ``@app.middleware('http')``. "
187
        "If a class is provided, vLLM will add it to the server "
188
        "using ``app.add_middleware()``. ")
189
190
191
    parser.add_argument(
        "--return-tokens-as-token-ids",
        action="store_true",
192
193
194
        help="When ``--max-logprobs`` is specified, represents single tokens "
        " as strings of the form 'token_id:{token_id}' so that tokens "
        "that are not JSON-encodable can be identified.")
195
196
197
198
199
    parser.add_argument(
        "--disable-frontend-multiprocessing",
        action="store_true",
        help="If specified, will run the OpenAI frontend server in the same "
        "process as the model serving engine.")
200
201
202
203
204
    parser.add_argument(
        "--enable-request-id-headers",
        action="store_true",
        help="If specified, API server will add X-Request-Id header to "
        "responses. Caution: this hurts performance at high QPS.")
205
206
207
208
    parser.add_argument(
        "--enable-auto-tool-choice",
        action="store_true",
        default=False,
209
210
        help="Enable auto tool choice for supported models. Use "
        "``--tool-call-parser`` to specify which parser to use.")
211

212
    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
213
214
215
    parser.add_argument(
        "--tool-call-parser",
        type=str,
216
217
        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
        "--tool-parser-plugin",
218
219
220
221
        default=None,
        help=
        "Select the tool call parser depending on the model that you're using."
        " This is used to parse the model-generated tool call into OpenAI API "
222
        "format. Required for ``--enable-auto-tool-choice``.")
223

224
225
226
227
228
229
230
    parser.add_argument(
        "--tool-parser-plugin",
        type=str,
        default="",
        help=
        "Special the tool parser plugin write to parse the model-generated tool"
        " into OpenAI API format, the name register in this plugin can be used "
231
        "in ``--tool-call-parser``.")
232

233
    parser = AsyncEngineArgs.add_cli_args(parser)
234
235
236
237
238
239
240
241

    parser.add_argument('--max-log-len',
                        type=int,
                        default=None,
                        help='Max number of prompt characters or prompt '
                        'ID numbers being printed in log.'
                        '\n\nDefault: Unlimited')

242
243
244
245
    parser.add_argument(
        "--disable-fastapi-docs",
        action='store_true',
        default=False,
246
        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."
247
    )
248
249
250
251
252
    parser.add_argument(
        "--enable-prompt-tokens-details",
        action='store_true',
        default=False,
        help="If set to True, enable prompt_tokens_details in usage.")
253

254
    return parser
Ethan Xu's avatar
Ethan Xu committed
255
256


257
258
259
260
261
262
263
264
265
266
267
268
269
270
def validate_parsed_serve_args(args: argparse.Namespace):
    """Quick checks for model serve args that raise prior to loading."""
    if hasattr(args, "subparser") and args.subparser != "serve":
        return

    # Ensure that the chat template is valid; raises if it likely isn't
    validate_chat_template(args.chat_template)

    # Enable auto tool needs a tool call parser to be valid
    if args.enable_auto_tool_choice and not args.tool_call_parser:
        raise TypeError("Error: --enable-auto-tool-choice requires "
                        "--tool-call-parser")


Ethan Xu's avatar
Ethan Xu committed
271
272
273
274
def create_parser_for_docs() -> FlexibleArgumentParser:
    parser_for_docs = FlexibleArgumentParser(
        prog="-m vllm.entrypoints.openai.api_server")
    return make_arg_parser(parser_for_docs)