Unverified Commit a036d419 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Fix max new tokens (#772)

parent f95e6617
...@@ -306,7 +306,11 @@ class ModelTpServer: ...@@ -306,7 +306,11 @@ class ModelTpServer:
) )
req.origin_input_ids = req.origin_input_ids[: self.max_req_input_len] req.origin_input_ids = req.origin_input_ids[: self.max_req_input_len]
req.sampling_params.max_new_tokens = min( req.sampling_params.max_new_tokens = min(
req.sampling_params.max_new_tokens or 1 << 30, (
req.sampling_params.max_new_tokens
if req.sampling_params.max_new_tokens is not None
else 1 << 30
),
self.max_req_input_len - 1 - len(req.origin_input_ids), self.max_req_input_len - 1 - len(req.origin_input_ids),
) )
self.forward_queue.append(req) self.forward_queue.append(req)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment