Unverified Commit 63fe3a70 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[PD] let p2p nccl toy proxy handle /chat/completions (#21734)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 0ae970ed
...@@ -120,6 +120,7 @@ async def forward_request(url, data, request_id): ...@@ -120,6 +120,7 @@ async def forward_request(url, data, request_id):
@app.route("/v1/completions", methods=["POST"]) @app.route("/v1/completions", methods=["POST"])
@app.route("/v1/chat/completions", methods=["POST"])
async def handle_request(): async def handle_request():
try: try:
original_request_data = await request.get_json() original_request_data = await request.get_json()
...@@ -157,13 +158,13 @@ async def handle_request(): ...@@ -157,13 +158,13 @@ async def handle_request():
# finish prefill # finish prefill
async for _ in forward_request( async for _ in forward_request(
f"http://{prefill_addr}/v1/completions", prefill_request, request_id f"http://{prefill_addr}{request.path}", prefill_request, request_id
): ):
continue continue
# return decode # return decode
generator = forward_request( generator = forward_request(
f"http://{decode_addr}/v1/completions", original_request_data, request_id f"http://{decode_addr}{request.path}", original_request_data, request_id
) )
response = await make_response(generator) response = await make_response(generator)
response.timeout = None response.timeout = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment