Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bf668b5b
Unverified
Commit
bf668b5b
authored
Jul 30, 2025
by
Yan Pashkovsky
Committed by
GitHub
Jul 30, 2025
Browse files
[Feature] Support multiple api keys in server (#18548)
Signed-off-by:
Yan Pashkovsky
<
yanp.bugz@gmail.com
>
parent
da3e0bd6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
30 additions
and
29 deletions
+30
-29
docs/getting_started/quickstart.md
docs/getting_started/quickstart.md
+1
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+6
-6
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+23
-23
No files found.
docs/getting_started/quickstart.md
View file @
bf668b5b
...
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
...
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
```
```
You can pass in the argument
`--api-key`
or environment variable
`VLLM_API_KEY`
to enable the server to check for API key in the header.
You can pass in the argument
`--api-key`
or environment variable
`VLLM_API_KEY`
to enable the server to check for API key in the header.
You can pass multiple keys after
`--api-key`
, and the server will accept any of the keys passed, this can be useful for key rotation.
### OpenAI Completions API with vLLM
### OpenAI Completions API with vLLM
...
...
vllm/entrypoints/openai/api_server.py
View file @
bf668b5b
...
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
...
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
2. The request path doesn't start with /v1 (e.g. /health).
2. The request path doesn't start with /v1 (e.g. /health).
"""
"""
def
__init__
(
self
,
app
:
ASGIApp
,
api_
token
:
str
)
->
None
:
def
__init__
(
self
,
app
:
ASGIApp
,
token
s
:
list
[
str
]
)
->
None
:
self
.
app
=
app
self
.
app
=
app
self
.
api_token
=
api_
token
self
.
api_token
s
=
{
f
"Bearer
{
token
}
"
for
token
in
token
s
}
def
__call__
(
self
,
scope
:
Scope
,
receive
:
Receive
,
def
__call__
(
self
,
scope
:
Scope
,
receive
:
Receive
,
send
:
Send
)
->
Awaitable
[
None
]:
send
:
Send
)
->
Awaitable
[
None
]:
...
@@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
...
@@ -1255,7 +1255,7 @@ class AuthenticationMiddleware:
headers
=
Headers
(
scope
=
scope
)
headers
=
Headers
(
scope
=
scope
)
# Type narrow to satisfy mypy.
# Type narrow to satisfy mypy.
if
url_path
.
startswith
(
"/v1"
)
and
headers
.
get
(
if
url_path
.
startswith
(
"/v1"
)
and
headers
.
get
(
"Authorization"
)
!=
f
"Bearer
{
self
.
api_token
}
"
:
"Authorization"
)
not
in
self
.
api_token
s
:
response
=
JSONResponse
(
content
=
{
"error"
:
"Unauthorized"
},
response
=
JSONResponse
(
content
=
{
"error"
:
"Unauthorized"
},
status_code
=
401
)
status_code
=
401
)
return
response
(
scope
,
receive
,
send
)
return
response
(
scope
,
receive
,
send
)
...
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
...
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
"""
"""
Middleware that checks if the model is currently scaling and
Middleware that checks if the model is currently scaling and
returns a 503 Service Unavailable response if it is.
returns a 503 Service Unavailable response if it is.
This middleware applies to all HTTP requests and prevents
This middleware applies to all HTTP requests and prevents
processing when the model is in a scaling state.
processing when the model is in a scaling state.
"""
"""
...
@@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
...
@@ -1512,8 +1512,8 @@ def build_app(args: Namespace) -> FastAPI:
status_code
=
HTTPStatus
.
BAD_REQUEST
)
status_code
=
HTTPStatus
.
BAD_REQUEST
)
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
if
token
:
=
args
.
api_key
or
envs
.
VLLM_API_KEY
:
if
token
s
:
=
[
key
for
key
in
(
args
.
api_key
or
[
envs
.
VLLM_API_KEY
])
if
key
]
:
app
.
add_middleware
(
AuthenticationMiddleware
,
api_
token
=
token
)
app
.
add_middleware
(
AuthenticationMiddleware
,
token
s
=
token
s
)
if
args
.
enable_request_id_headers
:
if
args
.
enable_request_id_headers
:
app
.
add_middleware
(
XRequestIdMiddleware
)
app
.
add_middleware
(
XRequestIdMiddleware
)
...
...
vllm/entrypoints/openai/cli_args.py
View file @
bf668b5b
...
@@ -85,22 +85,22 @@ class FrontendArgs:
...
@@ -85,22 +85,22 @@ class FrontendArgs:
"""Allowed methods."""
"""Allowed methods."""
allowed_headers
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[
"*"
])
allowed_headers
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[
"*"
])
"""Allowed headers."""
"""Allowed headers."""
api_key
:
Optional
[
str
]
=
None
api_key
:
Optional
[
list
[
str
]
]
=
None
"""If provided, the server will require
this
key to be presented in
the
"""If provided, the server will require
one of these
key
s
to be presented in
header."""
the
header."""
lora_modules
:
Optional
[
list
[
LoRAModulePath
]]
=
None
lora_modules
:
Optional
[
list
[
LoRAModulePath
]]
=
None
"""LoRA modules configurations in either 'name=path' format or JSON format
"""LoRA modules configurations in either 'name=path' format or JSON format
or JSON list format. Example (old format): `'name=path'` Example (new
or JSON list format. Example (old format): `'name=path'` Example (new
format): `{
\"
name
\"
:
\"
name
\"
,
\"
path
\"
:
\"
lora_path
\"
,
format): `{
\"
name
\"
:
\"
name
\"
,
\"
path
\"
:
\"
lora_path
\"
,
\"
base_model_name
\"
:
\"
id
\"
}`"""
\"
base_model_name
\"
:
\"
id
\"
}`"""
chat_template
:
Optional
[
str
]
=
None
chat_template
:
Optional
[
str
]
=
None
"""The file path to the chat template, or the template in single-line form
"""The file path to the chat template, or the template in single-line form
for the specified model."""
for the specified model."""
chat_template_content_format
:
ChatTemplateContentFormatOption
=
"auto"
chat_template_content_format
:
ChatTemplateContentFormatOption
=
"auto"
"""The format to render message content within a chat template.
"""The format to render message content within a chat template.
* "string" will render the content as a string. Example: `"Hello World"`
* "string" will render the content as a string. Example: `"Hello World"`
* "openai" will render the content as a list of dictionaries, similar to OpenAI
* "openai" will render the content as a list of dictionaries, similar to OpenAI
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
response_role
:
str
=
"assistant"
response_role
:
str
=
"assistant"
"""The role name to return if `request.add_generation_prompt=true`."""
"""The role name to return if `request.add_generation_prompt=true`."""
...
@@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
...
@@ -117,40 +117,40 @@ schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
root_path
:
Optional
[
str
]
=
None
root_path
:
Optional
[
str
]
=
None
"""FastAPI root_path when app is behind a path based routing proxy."""
"""FastAPI root_path when app is behind a path based routing proxy."""
middleware
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[])
middleware
:
list
[
str
]
=
field
(
default_factory
=
lambda
:
[])
"""Additional ASGI middleware to apply to the app. We accept multiple
"""Additional ASGI middleware to apply to the app. We accept multiple
--middleware arguments. The value should be an import path. If a function
--middleware arguments. The value should be an import path. If a function
is provided, vLLM will add it to the server using
is provided, vLLM will add it to the server using
`@app.middleware('http')`. If a class is provided, vLLM will
`@app.middleware('http')`. If a class is provided, vLLM will
add it to the server using `app.add_middleware()`."""
add it to the server using `app.add_middleware()`."""
return_tokens_as_token_ids
:
bool
=
False
return_tokens_as_token_ids
:
bool
=
False
"""When `--max-logprobs` is specified, represents single tokens as
"""When `--max-logprobs` is specified, represents single tokens as
strings of the form 'token_id:{token_id}' so that tokens that are not
strings of the form 'token_id:{token_id}' so that tokens that are not
JSON-encodable can be identified."""
JSON-encodable can be identified."""
disable_frontend_multiprocessing
:
bool
=
False
disable_frontend_multiprocessing
:
bool
=
False
"""If specified, will run the OpenAI frontend server in the same process as
"""If specified, will run the OpenAI frontend server in the same process as
the model serving engine."""
the model serving engine."""
enable_request_id_headers
:
bool
=
False
enable_request_id_headers
:
bool
=
False
"""If specified, API server will add X-Request-Id header to responses.
"""If specified, API server will add X-Request-Id header to responses.
Caution: this hurts performance at high QPS."""
Caution: this hurts performance at high QPS."""
enable_auto_tool_choice
:
bool
=
False
enable_auto_tool_choice
:
bool
=
False
"""If specified, exclude tool definitions in prompts when
"""If specified, exclude tool definitions in prompts when
tool_choice='none'."""
tool_choice='none'."""
exclude_tools_when_tool_choice_none
:
bool
=
False
exclude_tools_when_tool_choice_none
:
bool
=
False
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
to specify which parser to use."""
to specify which parser to use."""
tool_call_parser
:
Optional
[
str
]
=
None
tool_call_parser
:
Optional
[
str
]
=
None
"""Select the tool call parser depending on the model that you're using.
"""Select the tool call parser depending on the model that you're using.
This is used to parse the model-generated tool call into OpenAI API format.
This is used to parse the model-generated tool call into OpenAI API format.
Required for `--enable-auto-tool-choice`. You can choose any option from
Required for `--enable-auto-tool-choice`. You can choose any option from
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
tool_parser_plugin
:
str
=
""
tool_parser_plugin
:
str
=
""
"""Special the tool parser plugin write to parse the model-generated tool
"""Special the tool parser plugin write to parse the model-generated tool
into OpenAI API format, the name register in this plugin can be used in
into OpenAI API format, the name register in this plugin can be used in
`--tool-call-parser`."""
`--tool-call-parser`."""
log_config_file
:
Optional
[
str
]
=
envs
.
VLLM_LOGGING_CONFIG_PATH
log_config_file
:
Optional
[
str
]
=
envs
.
VLLM_LOGGING_CONFIG_PATH
"""Path to logging config JSON file for both vllm and uvicorn"""
"""Path to logging config JSON file for both vllm and uvicorn"""
max_log_len
:
Optional
[
int
]
=
None
max_log_len
:
Optional
[
int
]
=
None
"""Max number of prompt characters or prompt ID numbers being printed in
"""Max number of prompt characters or prompt ID numbers being printed in
log. The default of None means unlimited."""
log. The default of None means unlimited."""
disable_fastapi_docs
:
bool
=
False
disable_fastapi_docs
:
bool
=
False
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment