cli_args.py 9.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
"""
This file contains the command line arguments for the vLLM's
OpenAI-compatible server. It is kept in a separate file for documentation
purposes.
"""

import argparse
import json
import ssl
10
from typing import List, Optional, Sequence, Union
11

12
from vllm.engine.arg_utils import AsyncEngineArgs, nullable_str
13
from vllm.entrypoints.chat_utils import validate_chat_template
14
15
from vllm.entrypoints.openai.serving_engine import (LoRAModulePath,
                                                    PromptAdapterPath)
16
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
17
from vllm.utils import FlexibleArgumentParser
18
19
20
21


class LoRAParserAction(argparse.Action):

22
23
24
25
26
27
28
29
30
31
32
33
34
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        lora_list: List[LoRAModulePath] = []
35
        for item in values:
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
            if item in [None, '']:  # Skip if item is None or empty string
                continue
            if '=' in item and ',' not in item:  # Old format: name=path
                name, path = item.split('=')
                lora_list.append(LoRAModulePath(name, path))
            else:  # Assume JSON format
                try:
                    lora_dict = json.loads(item)
                    lora = LoRAModulePath(**lora_dict)
                    lora_list.append(lora)
                except json.JSONDecodeError:
                    parser.error(
                        f"Invalid JSON format for --lora-modules: {item}")
                except TypeError as e:
                    parser.error(
                        f"Invalid fields for --lora-modules: {item} - {str(e)}"
                    )
53
54
55
        setattr(namespace, self.dest, lora_list)


56
57
class PromptAdapterParserAction(argparse.Action):

58
59
60
61
62
63
64
65
66
67
68
69
70
    def __call__(
        self,
        parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: Optional[Union[str, Sequence[str]]],
        option_string: Optional[str] = None,
    ):
        if values is None:
            values = []
        if isinstance(values, str):
            raise TypeError("Expected values to be a list")

        adapter_list: List[PromptAdapterPath] = []
71
72
73
74
75
76
        for item in values:
            name, path = item.split('=')
            adapter_list.append(PromptAdapterPath(name, path))
        setattr(namespace, self.dest, adapter_list)


Ethan Xu's avatar
Ethan Xu committed
77
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
78
79
    parser.add_argument("--host",
                        type=nullable_str,
80
                        default=None,
81
                        help="host name")
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
    parser.add_argument("--port", type=int, default=8000, help="port number")
    parser.add_argument(
        "--uvicorn-log-level",
        type=str,
        default="info",
        choices=['debug', 'info', 'warning', 'error', 'critical', 'trace'],
        help="log level for uvicorn")
    parser.add_argument("--allow-credentials",
                        action="store_true",
                        help="allow credentials")
    parser.add_argument("--allowed-origins",
                        type=json.loads,
                        default=["*"],
                        help="allowed origins")
    parser.add_argument("--allowed-methods",
                        type=json.loads,
                        default=["*"],
                        help="allowed methods")
    parser.add_argument("--allowed-headers",
                        type=json.loads,
                        default=["*"],
                        help="allowed headers")
    parser.add_argument("--api-key",
105
                        type=nullable_str,
106
107
108
109
110
                        default=None,
                        help="If provided, the server will require this key "
                        "to be presented in the header.")
    parser.add_argument(
        "--lora-modules",
111
        type=nullable_str,
112
113
114
        default=None,
        nargs='+',
        action=LoRAParserAction,
115
116
117
118
119
120
        help="LoRA module configurations in either 'name=path' format"
        "or JSON format. "
        "Example (old format): 'name=path' "
        "Example (new format): "
        "'{\"name\": \"name\", \"local_path\": \"path\", "
        "\"base_model_name\": \"id\"}'")
121
122
123
124
125
126
127
128
    parser.add_argument(
        "--prompt-adapters",
        type=nullable_str,
        default=None,
        nargs='+',
        action=PromptAdapterParserAction,
        help="Prompt adapter configurations in the format name=path. "
        "Multiple adapters can be specified.")
129
    parser.add_argument("--chat-template",
130
                        type=nullable_str,
131
132
133
134
135
                        default=None,
                        help="The file path to the chat template, "
                        "or the template in single-line form "
                        "for the specified model")
    parser.add_argument("--response-role",
136
                        type=nullable_str,
137
138
139
140
                        default="assistant",
                        help="The role name to return if "
                        "`request.add_generation_prompt=true`.")
    parser.add_argument("--ssl-keyfile",
141
                        type=nullable_str,
142
143
144
                        default=None,
                        help="The file path to the SSL key file")
    parser.add_argument("--ssl-certfile",
145
                        type=nullable_str,
146
147
148
                        default=None,
                        help="The file path to the SSL cert file")
    parser.add_argument("--ssl-ca-certs",
149
                        type=nullable_str,
150
151
152
153
154
155
156
157
158
159
                        default=None,
                        help="The CA certificates file")
    parser.add_argument(
        "--ssl-cert-reqs",
        type=int,
        default=int(ssl.CERT_NONE),
        help="Whether client certificate is required (see stdlib ssl module's)"
    )
    parser.add_argument(
        "--root-path",
160
        type=nullable_str,
161
162
163
164
        default=None,
        help="FastAPI root_path when app is behind a path based routing proxy")
    parser.add_argument(
        "--middleware",
165
        type=nullable_str,
166
167
168
169
170
171
172
173
174
        action="append",
        default=[],
        help="Additional ASGI middleware to apply to the app. "
        "We accept multiple --middleware arguments. "
        "The value should be an import path. "
        "If a function is provided, vLLM will add it to the server "
        "using @app.middleware('http'). "
        "If a class is provided, vLLM will add it to the server "
        "using app.add_middleware(). ")
175
176
177
    parser.add_argument(
        "--return-tokens-as-token-ids",
        action="store_true",
178
179
        help="When --max-logprobs is specified, represents single tokens as "
        "strings of the form 'token_id:{token_id}' so that tokens that "
180
        "are not JSON-encodable can be identified.")
181
182
183
184
185
    parser.add_argument(
        "--disable-frontend-multiprocessing",
        action="store_true",
        help="If specified, will run the OpenAI frontend server in the same "
        "process as the model serving engine.")
186

187
188
189
190
191
192
    parser.add_argument(
        "--enable-auto-tool-choice",
        action="store_true",
        default=False,
        help=
        "Enable auto tool choice for supported models. Use --tool-call-parser"
193
        " to specify which parser to use")
194

195
    valid_tool_parsers = ToolParserManager.tool_parsers.keys()
196
197
198
    parser.add_argument(
        "--tool-call-parser",
        type=str,
199
200
        metavar="{" + ",".join(valid_tool_parsers) + "} or name registered in "
        "--tool-parser-plugin",
201
202
203
204
205
206
        default=None,
        help=
        "Select the tool call parser depending on the model that you're using."
        " This is used to parse the model-generated tool call into OpenAI API "
        "format. Required for --enable-auto-tool-choice.")

207
208
209
210
211
212
213
214
215
    parser.add_argument(
        "--tool-parser-plugin",
        type=str,
        default="",
        help=
        "Special the tool parser plugin write to parse the model-generated tool"
        " into OpenAI API format, the name register in this plugin can be used "
        "in --tool-call-parser.")

216
    parser = AsyncEngineArgs.add_cli_args(parser)
217
218
219
220
221
222
223
224

    parser.add_argument('--max-log-len',
                        type=int,
                        default=None,
                        help='Max number of prompt characters or prompt '
                        'ID numbers being printed in log.'
                        '\n\nDefault: Unlimited')

225
226
227
228
229
230
    parser.add_argument(
        "--disable-fastapi-docs",
        action='store_true',
        default=False,
        help="Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
    )
231
232
233
234
235
    parser.add_argument(
        "--enable-prompt-tokens-details",
        action='store_true',
        default=False,
        help="If set to True, enable prompt_tokens_details in usage.")
236

237
    return parser
Ethan Xu's avatar
Ethan Xu committed
238
239


240
241
242
243
244
245
246
247
248
249
250
251
252
253
def validate_parsed_serve_args(args: argparse.Namespace):
    """Quick checks for model serve args that raise prior to loading."""
    if hasattr(args, "subparser") and args.subparser != "serve":
        return

    # Ensure that the chat template is valid; raises if it likely isn't
    validate_chat_template(args.chat_template)

    # Enable auto tool needs a tool call parser to be valid
    if args.enable_auto_tool_choice and not args.tool_call_parser:
        raise TypeError("Error: --enable-auto-tool-choice requires "
                        "--tool-call-parser")


Ethan Xu's avatar
Ethan Xu committed
254
255
256
257
def create_parser_for_docs() -> FlexibleArgumentParser:
    parser_for_docs = FlexibleArgumentParser(
        prog="-m vllm.entrypoints.openai.api_server")
    return make_arg_parser(parser_for_docs)