Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
00e6402d
Unverified
Commit
00e6402d
authored
Jan 14, 2026
by
Chauncey
Committed by
GitHub
Jan 14, 2026
Browse files
[Frontend] track responsesAPI server_load (#32323)
Signed-off-by:
chaunceyjiang
<
chaunceyjiang@gmail.com
>
parent
ce094624
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
0 deletions
+8
-0
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+4
-0
vllm/entrypoints/openai/responses/api_router.py
vllm/entrypoints/openai/responses/api_router.py
+4
-0
No files found.
vllm/entrypoints/openai/api_server.py
View file @
00e6402d
...
@@ -259,6 +259,10 @@ def engine_client(request: Request) -> EngineClient:
...
@@ -259,6 +259,10 @@ def engine_client(request: Request) -> EngineClient:
async
def
get_server_load_metrics
(
request
:
Request
):
async
def
get_server_load_metrics
(
request
:
Request
):
# This endpoint returns the current server load metrics.
# This endpoint returns the current server load metrics.
# It tracks requests utilizing the GPU from the following routes:
# It tracks requests utilizing the GPU from the following routes:
# - /v1/responses
# - /v1/responses/{response_id}
# - /v1/responses/{response_id}/cancel
# - /v1/messages
# - /v1/chat/completions
# - /v1/chat/completions
# - /v1/completions
# - /v1/completions
# - /v1/audio/transcriptions
# - /v1/audio/transcriptions
...
...
vllm/entrypoints/openai/responses/api_router.py
View file @
00e6402d
...
@@ -17,6 +17,7 @@ from vllm.entrypoints.openai.responses.protocol import (
...
@@ -17,6 +17,7 @@ from vllm.entrypoints.openai.responses.protocol import (
from
vllm.entrypoints.openai.responses.serving
import
OpenAIServingResponses
from
vllm.entrypoints.openai.responses.serving
import
OpenAIServingResponses
from
vllm.entrypoints.openai.utils
import
validate_json_request
from
vllm.entrypoints.openai.utils
import
validate_json_request
from
vllm.entrypoints.utils
import
(
from
vllm.entrypoints.utils
import
(
load_aware_call
,
with_cancellation
,
with_cancellation
,
)
)
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
...
@@ -54,6 +55,7 @@ async def _convert_stream_to_sse_events(
...
@@ -54,6 +55,7 @@ async def _convert_stream_to_sse_events(
},
},
)
)
@
with_cancellation
@
with_cancellation
@
load_aware_call
async
def
create_responses
(
request
:
ResponsesRequest
,
raw_request
:
Request
):
async
def
create_responses
(
request
:
ResponsesRequest
,
raw_request
:
Request
):
handler
=
responses
(
raw_request
)
handler
=
responses
(
raw_request
)
if
handler
is
None
:
if
handler
is
None
:
...
@@ -79,6 +81,7 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
...
@@ -79,6 +81,7 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
@
router
.
get
(
"/v1/responses/{response_id}"
)
@
router
.
get
(
"/v1/responses/{response_id}"
)
@
load_aware_call
async
def
retrieve_responses
(
async
def
retrieve_responses
(
response_id
:
str
,
response_id
:
str
,
raw_request
:
Request
,
raw_request
:
Request
,
...
@@ -113,6 +116,7 @@ async def retrieve_responses(
...
@@ -113,6 +116,7 @@ async def retrieve_responses(
@
router
.
post
(
"/v1/responses/{response_id}/cancel"
)
@
router
.
post
(
"/v1/responses/{response_id}/cancel"
)
@
load_aware_call
async
def
cancel_responses
(
response_id
:
str
,
raw_request
:
Request
):
async
def
cancel_responses
(
response_id
:
str
,
raw_request
:
Request
):
handler
=
responses
(
raw_request
)
handler
=
responses
(
raw_request
)
if
handler
is
None
:
if
handler
is
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment