Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
57e729e8
Unverified
Commit
57e729e8
authored
Jan 16, 2025
by
maang-h
Committed by
GitHub
Jan 15, 2025
Browse files
[Doc]: Update `OpenAI-Compatible Server` documents (#12082)
parent
de0526f6
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
38 deletions
+38
-38
vllm/engine/arg_utils.py
vllm/engine/arg_utils.py
+8
-8
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+30
-30
No files found.
vllm/engine/arg_utils.py
View file @
57e729e8
...
...
@@ -238,7 +238,7 @@ class EngineArgs:
choices
=
get_args
(
TaskOption
),
help
=
'The task to use the model for. Each vLLM instance only '
'supports one task, even if the same model can be used for '
'multiple tasks. When the model only supports one task, "auto" '
'multiple tasks. When the model only supports one task,
``
"auto"
``
'
'can be used to select it; otherwise, you must specify explicitly '
'which task to use.'
)
parser
.
add_argument
(
...
...
@@ -250,7 +250,7 @@ class EngineArgs:
parser
.
add_argument
(
'--skip-tokenizer-init'
,
action
=
'store_true'
,
help
=
'Skip initialization of tokenizer and detokenizer'
)
help
=
'Skip initialization of tokenizer and detokenizer
.
'
)
parser
.
add_argument
(
'--revision'
,
type
=
nullable_str
,
...
...
@@ -401,7 +401,7 @@ class EngineArgs:
parser
.
add_argument
(
'--worker-use-ray'
,
action
=
'store_true'
,
help
=
'Deprecated, use --distributed-executor-backend=ray.'
)
help
=
'Deprecated, use
``
--distributed-executor-backend=ray
``
.'
)
parser
.
add_argument
(
'--pipeline-parallel-size'
,
'-pp'
,
type
=
int
,
...
...
@@ -430,7 +430,7 @@ class EngineArgs:
choices
=
[
8
,
16
,
32
,
64
,
128
],
help
=
'Token block size for contiguous chunks of '
'tokens. This is ignored on neuron devices and '
'set to max-model-len. On CUDA devices, '
'set to
``--
max-model-len
``
. On CUDA devices, '
'only block sizes up to 32 are supported. '
'On HPU devices, block size defaults to 128.'
)
...
...
@@ -439,12 +439,12 @@ class EngineArgs:
action
=
argparse
.
BooleanOptionalAction
,
default
=
EngineArgs
.
enable_prefix_caching
,
help
=
"Enables automatic prefix caching. "
"Use --no-enable-prefix-caching to disable explicitly."
,
"Use
``
--no-enable-prefix-caching
``
to disable explicitly."
,
)
parser
.
add_argument
(
'--disable-sliding-window'
,
action
=
'store_true'
,
help
=
'Disables sliding window, '
'capping to sliding window size'
)
'capping to sliding window size
.
'
)
parser
.
add_argument
(
'--use-v2-block-manager'
,
action
=
'store_true'
,
default
=
True
,
...
...
@@ -861,7 +861,7 @@ class EngineArgs:
"of the provided names. The model name in the model "
"field of a response will be the first name in this "
"list. If not specified, the model name will be the "
"same as the `--model` argument. Noted that this name(s) "
"same as the
`
`--model`
`
argument. Noted that this name(s) "
"will also be used in `model_name` tag content of "
"prometheus metrics, if multiple names provided, metrics "
"tag will take the first one."
)
...
...
@@ -881,7 +881,7 @@ class EngineArgs:
default
=
None
,
help
=
"Valid choices are "
+
","
.
join
(
ALLOWED_DETAILED_TRACE_MODULES
)
+
". It makes sense to set this only if --otlp-traces-endpoint is"
". It makes sense to set this only if
``
--otlp-traces-endpoint
``
is"
" set. If set, it will collect detailed traces for the specified "
"modules. This involves use of possibly costly and or blocking "
"operations and hence might have a performance impact."
)
...
...
vllm/entrypoints/openai/cli_args.py
View file @
57e729e8
...
...
@@ -79,29 +79,29 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
parser
.
add_argument
(
"--host"
,
type
=
nullable_str
,
default
=
None
,
help
=
"
h
ost name"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
,
help
=
"
p
ort number"
)
help
=
"
H
ost name
.
"
)
parser
.
add_argument
(
"--port"
,
type
=
int
,
default
=
8000
,
help
=
"
P
ort number
.
"
)
parser
.
add_argument
(
"--uvicorn-log-level"
,
type
=
str
,
default
=
"info"
,
choices
=
[
'debug'
,
'info'
,
'warning'
,
'error'
,
'critical'
,
'trace'
],
help
=
"
l
og level for uvicorn"
)
help
=
"
L
og level for uvicorn
.
"
)
parser
.
add_argument
(
"--allow-credentials"
,
action
=
"store_true"
,
help
=
"
a
llow credentials"
)
help
=
"
A
llow credentials
.
"
)
parser
.
add_argument
(
"--allowed-origins"
,
type
=
json
.
loads
,
default
=
[
"*"
],
help
=
"
a
llowed origins"
)
help
=
"
A
llowed origins
.
"
)
parser
.
add_argument
(
"--allowed-methods"
,
type
=
json
.
loads
,
default
=
[
"*"
],
help
=
"
a
llowed methods"
)
help
=
"
A
llowed methods
.
"
)
parser
.
add_argument
(
"--allowed-headers"
,
type
=
json
.
loads
,
default
=
[
"*"
],
help
=
"
a
llowed headers"
)
help
=
"
A
llowed headers
.
"
)
parser
.
add_argument
(
"--api-key"
,
type
=
nullable_str
,
default
=
None
,
...
...
@@ -115,10 +115,10 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action
=
LoRAParserAction
,
help
=
"LoRA module configurations in either 'name=path' format"
"or JSON format. "
"Example (old format): 'name=path' "
"Example (old format):
``
'name=path'
``
"
"Example (new format): "
"
'
{
\"
name
\"
:
\"
name
\"
,
\"
local_path
\"
:
\"
path
\"
, "
"
\"
base_model_name
\"
:
\"
id
\"
}
'
"
)
"
``
{
\"
name
\"
:
\"
name
\"
,
\"
local_path
\"
:
\"
path
\"
, "
"
\"
base_model_name
\"
:
\"
id
\"
}
``
"
)
parser
.
add_argument
(
"--prompt-adapters"
,
type
=
nullable_str
,
...
...
@@ -132,7 +132,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
default
=
None
,
help
=
"The file path to the chat template, "
"or the template in single-line form "
"for the specified model"
)
"for the specified model
.
"
)
parser
.
add_argument
(
'--chat-template-content-format'
,
type
=
str
,
...
...
@@ -141,38 +141,39 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help
=
'The format to render message content within a chat template.'
'
\n\n
'
'* "string" will render the content as a string. '
'Example: "Hello World"
\n
'
'Example:
``
"Hello World"
``
\n
'
'* "openai" will render the content as a list of dictionaries, '
'similar to OpenAI schema. '
'Example: [{"type": "text", "text": "Hello world!"}]'
)
'Example:
``
[{"type": "text", "text": "Hello world!"}]
``
'
)
parser
.
add_argument
(
"--response-role"
,
type
=
nullable_str
,
default
=
"assistant"
,
help
=
"The role name to return if "
"`request.add_generation_prompt=true`."
)
"`
`
request.add_generation_prompt=true`
`
."
)
parser
.
add_argument
(
"--ssl-keyfile"
,
type
=
nullable_str
,
default
=
None
,
help
=
"The file path to the SSL key file"
)
help
=
"The file path to the SSL key file
.
"
)
parser
.
add_argument
(
"--ssl-certfile"
,
type
=
nullable_str
,
default
=
None
,
help
=
"The file path to the SSL cert file"
)
help
=
"The file path to the SSL cert file
.
"
)
parser
.
add_argument
(
"--ssl-ca-certs"
,
type
=
nullable_str
,
default
=
None
,
help
=
"The CA certificates file"
)
help
=
"The CA certificates file
.
"
)
parser
.
add_argument
(
"--ssl-cert-reqs"
,
type
=
int
,
default
=
int
(
ssl
.
CERT_NONE
),
help
=
"Whether client certificate is required (see stdlib ssl module's)"
help
=
"Whether client certificate is required (see stdlib ssl module's)
.
"
)
parser
.
add_argument
(
"--root-path"
,
type
=
nullable_str
,
default
=
None
,
help
=
"FastAPI root_path when app is behind a path based routing proxy"
)
help
=
"FastAPI root_path when app is behind a path based routing proxy."
)
parser
.
add_argument
(
"--middleware"
,
type
=
nullable_str
,
...
...
@@ -182,15 +183,15 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"We accept multiple --middleware arguments. "
"The value should be an import path. "
"If a function is provided, vLLM will add it to the server "
"using @app.middleware('http'). "
"using
``
@app.middleware('http')
``
. "
"If a class is provided, vLLM will add it to the server "
"using app.add_middleware(). "
)
"using
``
app.add_middleware()
``
. "
)
parser
.
add_argument
(
"--return-tokens-as-token-ids"
,
action
=
"store_true"
,
help
=
"When --max-logprobs is specified, represents single tokens
as
"
"strings of the form 'token_id:{token_id}' so that tokens
that
"
"are not JSON-encodable can be identified."
)
help
=
"When
``
--max-logprobs
``
is specified, represents single tokens "
"
as
strings of the form 'token_id:{token_id}' so that tokens "
"
that
are not JSON-encodable can be identified."
)
parser
.
add_argument
(
"--disable-frontend-multiprocessing"
,
action
=
"store_true"
,
...
...
@@ -205,9 +206,8 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--enable-auto-tool-choice"
,
action
=
"store_true"
,
default
=
False
,
help
=
"Enable auto tool choice for supported models. Use --tool-call-parser"
" to specify which parser to use"
)
help
=
"Enable auto tool choice for supported models. Use "
"``--tool-call-parser`` to specify which parser to use."
)
valid_tool_parsers
=
ToolParserManager
.
tool_parsers
.
keys
()
parser
.
add_argument
(
...
...
@@ -219,7 +219,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help
=
"Select the tool call parser depending on the model that you're using."
" This is used to parse the model-generated tool call into OpenAI API "
"format. Required for --enable-auto-tool-choice."
)
"format. Required for
``
--enable-auto-tool-choice
``
."
)
parser
.
add_argument
(
"--tool-parser-plugin"
,
...
...
@@ -228,7 +228,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
help
=
"Special the tool parser plugin write to parse the model-generated tool"
" into OpenAI API format, the name register in this plugin can be used "
"in --tool-call-parser."
)
"in
``
--tool-call-parser
``
."
)
parser
=
AsyncEngineArgs
.
add_cli_args
(
parser
)
...
...
@@ -243,7 +243,7 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
"--disable-fastapi-docs"
,
action
=
'store_true'
,
default
=
False
,
help
=
"Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint"
help
=
"Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint
.
"
)
parser
.
add_argument
(
"--enable-prompt-tokens-details"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment