"csrc/vscode:/vscode.git/clone" did not exist on "fc911880cc505197a8eaa54d0d9c49edfa593b92"
Unverified Commit 5c54d975 authored by Abirdcfly's avatar Abirdcfly Committed by GitHub
Browse files

[Bugfix][PD] set max_completion_tokens=1 if req has this value (#21841)


Signed-off-by: default avatarAbirdcfly <fp544037857@gmail.com>
parent 0a6d305e
...@@ -293,6 +293,8 @@ class Proxy: ...@@ -293,6 +293,8 @@ class Proxy:
# add params to request # add params to request
kv_prepare_request = request.copy() kv_prepare_request = request.copy()
kv_prepare_request["max_tokens"] = 1 kv_prepare_request["max_tokens"] = 1
if "max_completion_tokens" in kv_prepare_request:
kv_prepare_request["max_completion_tokens"] = 1
# prefill stage # prefill stage
prefill_instance = self.schedule(self.prefill_cycler) prefill_instance = self.schedule(self.prefill_cycler)
......
...@@ -128,6 +128,8 @@ async def handle_request(): ...@@ -128,6 +128,8 @@ async def handle_request():
prefill_request = original_request_data.copy() prefill_request = original_request_data.copy()
# change max_tokens = 1 to let it only do prefill # change max_tokens = 1 to let it only do prefill
prefill_request["max_tokens"] = 1 prefill_request["max_tokens"] = 1
if "max_completion_tokens" in prefill_request:
prefill_request["max_completion_tokens"] = 1
global count global count
global prefill_instances global prefill_instances
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment