Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
myrfy001
vllm_dsv4
Commits
bf668b5b
Unverified
Commit
bf668b5b
authored
Jul 30, 2025
by
Yan Pashkovsky
Committed by
GitHub
Jul 30, 2025
Browse files
[Feature] Support multiple api keys in server (#18548)
Signed-off-by:
Yan Pashkovsky
<
yanp.bugz@gmail.com
>
parent
da3e0bd6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
29 deletions
+30
-29
docs/getting_started/quickstart.md
docs/getting_started/quickstart.md
+1
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+6
-6
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+23
-23
No files found.
docs/getting_started/quickstart.md
View file @
bf668b5b
...
...
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
```
You can pass in the argument
`--api-key`
or environment variable
`VLLM_API_KEY`
to enable the server to check for API key in the header.
You can pass multiple keys after
`--api-key`
, and the server will accept any of the keys passed, this can be useful for key rotation.
### OpenAI Completions API with vLLM
...
...
vllm/entrypoints/openai/api_server.py
View file @
bf668b5b
...
...
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
2. The request path doesn't start with /v1 (e.g. /health).
"""
def
__init__
(
self
,
app
:
ASGIApp
,
api_
token
:
str
)
->
None
:
def
__init__
(
self
,
app
:
ASGIApp
,
token
s
:
list
[
str
]
)
->
None
:
self
.
app
=
app
self
.
api_token
=
api_
token
self
.
api_token
s
=
{
f
"Bearer
{
token
}
"
for
token
in
token
s
}
def
__call__
(
self
,
scope
:
Scope
,
receive
:
Receive
,
send
:
Send
)
->
Awaitable
[
None
]:
...
...
@@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
headers
=
Headers
(
scope
=
scope
)
# Type narrow to satisfy mypy.
if
url_path
.
startswith
(
"/v1"
)
and
headers
.
get
(
"Authorization"
)
!=
f
"Bearer
{
self
.
api_token
}
"
:
"Authorization"
)
not
in
self
.
api_token
s
:
response
=
JSONResponse
(
content
=
{
"error"
:
"Unauthorized"
},
status_code
=
401
)
return
response
(
scope
,
receive
,
send
)
...
...
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
"""
Middleware that checks if the model is currently scaling and
returns a 503 Service Unavailable response if it is.
This middleware applies to all HTTP requests and prevents
processing when the model is in a scaling state.
"""
...
...
@@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
status_code
=
HTTPStatus
.
BAD_REQUEST
)
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
if
token
:
=
args
.
api_key
or
envs
.
VLLM_API_KEY
:
app
.
add_middleware
(
AuthenticationMiddleware
,
api_
token
=
token
)
if
token
s
:
=
[
key
for
key
in
(
args
.
api_key
or
[
envs
.
VLLM_API_KEY
])
if
key
]
:
app
.
add_middleware
(
AuthenticationMiddleware
,
token
s
=
token
s
)
if
args
.
enable_request_id_headers
:
app
.
add_middleware
(
XRequestIdMiddleware
)
...
...
vllm/entrypoints/openai/cli_args.py
View file @
bf668b5b
...
...
@@ -85,22 +85,22 @@ class FrontendArgs:
"""Allowed methods."""
allowed_headers
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[
"*"
])
"""Allowed headers."""
api_key
:
Optional
[
str
]
=
None
"""If provided, the server will require
this
key to be presented in
the
header."""
api_key
:
Optional
[
list
[
str
]
]
=
None
"""If provided, the server will require
one of these
key
s
to be presented in
the
header."""
lora_modules
:
Optional
[
list
[
LoRAModulePath
]]
=
None
"""LoRA modules configurations in either 'name=path' format or JSON format
or JSON list format. Example (old format): `'name=path'` Example (new
format): `{
\"
name
\"
:
\"
name
\"
,
\"
path
\"
:
\"
lora_path
\"
,
or JSON list format. Example (old format): `'name=path'` Example (new
format): `{
\"
name
\"
:
\"
name
\"
,
\"
path
\"
:
\"
lora_path
\"
,
\"
base_model_name
\"
:
\"
id
\"
}`"""
chat_template
:
Optional
[
str
]
=
None
"""The file path to the chat template, or the template in single-line form
"""The file path to the chat template, or the template in single-line form
for the specified model."""
chat_template_content_format
:
ChatTemplateContentFormatOption
=
"auto"
"""The format to render message content within a chat template.
* "string" will render the content as a string. Example: `"Hello World"`
* "openai" will render the content as a list of dictionaries, similar to OpenAI
* "openai" will render the content as a list of dictionaries, similar to OpenAI
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
response_role
:
str
=
"assistant"
"""The role name to return if `request.add_generation_prompt=true`."""
...
...
@@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
root_path
:
Optional
[
str
]
=
None
"""FastAPI root_path when app is behind a path based routing proxy."""
middleware
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[])
"""Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will
"""Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will
add it to the server using `app.add_middleware()`."""
return_tokens_as_token_ids
:
bool
=
False
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
JSON-encodable can be identified."""
disable_frontend_multiprocessing
:
bool
=
False
"""If specified, will run the OpenAI frontend server in the same process as
"""If specified, will run the OpenAI frontend server in the same process as
the model serving engine."""
enable_request_id_headers
:
bool
=
False
"""If specified, API server will add X-Request-Id header to responses.
"""If specified, API server will add X-Request-Id header to responses.
Caution: this hurts performance at high QPS."""
enable_auto_tool_choice
:
bool
=
False
"""If specified, exclude tool definitions in prompts when
"""If specified, exclude tool definitions in prompts when
tool_choice='none'."""
exclude_tools_when_tool_choice_none
:
bool
=
False
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
to specify which parser to use."""
tool_call_parser
:
Optional
[
str
]
=
None
"""Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from
"""Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
tool_parser_plugin
:
str
=
""
"""Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in
"""Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in
`--tool-call-parser`."""
log_config_file
:
Optional
[
str
]
=
envs
.
VLLM_LOGGING_CONFIG_PATH
"""Path to logging config JSON file for both vllm and uvicorn"""
max_log_len
:
Optional
[
int
]
=
None
"""Max number of prompt characters or prompt ID numbers being printed in
"""Max number of prompt characters or prompt ID numbers being printed in
log. The default of None means unlimited."""
disable_fastapi_docs
:
bool
=
False
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment