Unverified Commit 63fe3a70 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[PD] let p2p nccl toy proxy handle /chat/completions (#21734)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 0ae970ed
......@@ -120,6 +120,7 @@ async def forward_request(url, data, request_id):
@app.route("/v1/completions", methods=["POST"])
@app.route("/v1/chat/completions", methods=["POST"])
async def handle_request():
try:
original_request_data = await request.get_json()
......@@ -157,13 +158,13 @@ async def handle_request():
# finish prefill
async for _ in forward_request(
f"http://{prefill_addr}/v1/completions", prefill_request, request_id
f"http://{prefill_addr}{request.path}", prefill_request, request_id
):
continue
# return decode
generator = forward_request(
f"http://{decode_addr}/v1/completions", original_request_data, request_id
f"http://{decode_addr}{request.path}", original_request_data, request_id
)
response = await make_response(generator)
response.timeout = None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment