Unverified Commit 5c54d975 authored by Abirdcfly's avatar Abirdcfly Committed by GitHub
Browse files

[Bugfix][PD] set max_completion_tokens=1 if req has this value (#21841)


Signed-off-by: default avatarAbirdcfly <fp544037857@gmail.com>
parent 0a6d305e
......@@ -293,6 +293,8 @@ class Proxy:
# add params to request
kv_prepare_request = request.copy()
kv_prepare_request["max_tokens"] = 1
if "max_completion_tokens" in kv_prepare_request:
kv_prepare_request["max_completion_tokens"] = 1
# prefill stage
prefill_instance = self.schedule(self.prefill_cycler)
......
......@@ -128,6 +128,8 @@ async def handle_request():
prefill_request = original_request_data.copy()
# change max_tokens = 1 to let it only do prefill
prefill_request["max_tokens"] = 1
if "max_completion_tokens" in prefill_request:
prefill_request["max_completion_tokens"] = 1
global count
global prefill_instances
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment