Unverified Commit e9fd11c0 authored by shangmingc's avatar shangmingc Committed by GitHub
Browse files

[Bugfix] Fix ChatCompletion endpoint of mini_lb when stream is set (#6703)


Signed-off-by: default avatarShangming Cai <caishangming@linux.alibaba.com>
parent c7588d59
......@@ -117,8 +117,8 @@ class MiniLoadBalancer:
) as session:
# Create the tasks for both prefill and decode requests
tasks = [
session.post(f"{prefill_server}/generate", json=modified_request),
session.post(f"{decode_server}/generate", json=modified_request),
session.post(f"{prefill_server}/{endpoint}", json=modified_request),
session.post(f"{decode_server}/{endpoint}", json=modified_request),
]
# Wait for both responses to complete. Since this is streaming, they return immediately.
prefill_response, decode_response = await asyncio.gather(*tasks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment