Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
720b10fd
Unverified
Commit
720b10fd
authored
Dec 26, 2024
by
Robert Shaw
Committed by
GitHub
Dec 26, 2024
Browse files
[1/N] API Server (Remove Proxy) (#11529)
parent
b85a9778
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
7 deletions
+17
-7
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+12
-6
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/cli_args.py
+5
-1
No files found.
vllm/entrypoints/openai/api_server.py
View file @
720b10fd
...
@@ -585,9 +585,15 @@ def build_app(args: Namespace) -> FastAPI:
...
@@ -585,9 +585,15 @@ def build_app(args: Namespace) -> FastAPI:
status_code
=
401
)
status_code
=
401
)
return
await
call_next
(
request
)
return
await
call_next
(
request
)
if
args
.
enable_request_id_headers
:
logger
.
warning
(
"CAUTION: Enabling X-Request-Id headers in the API Server. "
"This can harm performance at high QPS."
)
@
app
.
middleware
(
"http"
)
@
app
.
middleware
(
"http"
)
async
def
add_request_id
(
request
:
Request
,
call_next
):
async
def
add_request_id
(
request
:
Request
,
call_next
):
request_id
=
request
.
headers
.
get
(
"X-Request-Id"
)
or
uuid
.
uuid4
().
hex
request_id
=
request
.
headers
.
get
(
"X-Request-Id"
)
or
uuid
.
uuid4
().
hex
response
=
await
call_next
(
request
)
response
=
await
call_next
(
request
)
response
.
headers
[
"X-Request-Id"
]
=
request_id
response
.
headers
[
"X-Request-Id"
]
=
request_id
return
response
return
response
...
...
vllm/entrypoints/openai/cli_args.py
View file @
720b10fd
...
@@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
...
@@ -196,7 +196,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"If specified, will run the OpenAI frontend server in the same "
help
=
"If specified, will run the OpenAI frontend server in the same "
"process as the model serving engine."
)
"process as the model serving engine."
)
parser
.
add_argument
(
"--enable-request-id-headers"
,
action
=
"store_true"
,
help
=
"If specified, API server will add X-Request-Id header to "
"responses. Caution: this hurts performance at high QPS."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--enable-auto-tool-choice"
,
"--enable-auto-tool-choice"
,
action
=
"store_true"
,
action
=
"store_true"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment