Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4734704b
Unverified
Commit
4734704b
authored
Jun 25, 2025
by
lkchen
Committed by
GitHub
Jun 25, 2025
Browse files
[PD] let toy proxy handle /chat/completions (#19730)
Signed-off-by:
Linkun
<
github@lkchen.net
>
parent
8b8c209e
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
7 deletions
+15
-7
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
+15
-7
No files found.
tests/v1/kv_connector/nixl_integration/toy_proxy_server.py
View file @
4734704b
...
@@ -196,8 +196,7 @@ async def stream_service_response(client_info: dict, endpoint: str,
...
@@ -196,8 +196,7 @@ async def stream_service_response(client_info: dict, endpoint: str,
yield
chunk
yield
chunk
@
app
.
post
(
"/v1/completions"
)
async
def
_handle_completions
(
api
:
str
,
request
:
Request
):
async
def
handle_completions
(
request
:
Request
):
try
:
try
:
req_data
=
await
request
.
json
()
req_data
=
await
request
.
json
()
request_id
=
str
(
uuid
.
uuid4
())
request_id
=
str
(
uuid
.
uuid4
())
...
@@ -206,9 +205,8 @@ async def handle_completions(request: Request):
...
@@ -206,9 +205,8 @@ async def handle_completions(request: Request):
prefill_client_info
=
get_next_client
(
request
.
app
,
'prefill'
)
prefill_client_info
=
get_next_client
(
request
.
app
,
'prefill'
)
# Send request to prefill service
# Send request to prefill service
response
=
await
send_request_to_service
(
prefill_client_info
,
response
=
await
send_request_to_service
(
prefill_client_info
,
api
,
"/completions"
,
req_data
,
req_data
,
request_id
)
request_id
)
# Extract the needed fields
# Extract the needed fields
response_json
=
response
.
json
()
response_json
=
response
.
json
()
...
@@ -224,7 +222,7 @@ async def handle_completions(request: Request):
...
@@ -224,7 +222,7 @@ async def handle_completions(request: Request):
# Stream response from decode service
# Stream response from decode service
async
def
generate_stream
():
async
def
generate_stream
():
async
for
chunk
in
stream_service_response
(
decode_client_info
,
async
for
chunk
in
stream_service_response
(
decode_client_info
,
"/completions"
,
api
,
req_data
,
req_data
,
request_id
=
request_id
):
request_id
=
request_id
):
yield
chunk
yield
chunk
...
@@ -237,12 +235,22 @@ async def handle_completions(request: Request):
...
@@ -237,12 +235,22 @@ async def handle_completions(request: Request):
import
traceback
import
traceback
exc_info
=
sys
.
exc_info
()
exc_info
=
sys
.
exc_info
()
print
(
"Error occurred in disagg prefill proxy server"
print
(
"Error occurred in disagg prefill proxy server"
" -
completions
endpoint"
)
f
" -
{
api
}
endpoint"
)
print
(
e
)
print
(
e
)
print
(
""
.
join
(
traceback
.
format_exception
(
*
exc_info
)))
print
(
""
.
join
(
traceback
.
format_exception
(
*
exc_info
)))
raise
raise
@
app
.
post
(
"/v1/completions"
)
async
def
handle_completions
(
request
:
Request
):
return
await
_handle_completions
(
"/completions"
,
request
)
@
app
.
post
(
"/v1/chat/completions"
)
async
def
handle_chat_completions
(
request
:
Request
):
return
await
_handle_completions
(
"/chat/completions"
,
request
)
@
app
.
get
(
"/healthcheck"
)
@
app
.
get
(
"/healthcheck"
)
async
def
healthcheck
():
async
def
healthcheck
():
"""Simple endpoint to check if the server is running."""
"""Simple endpoint to check if the server is running."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment