Unverified Commit 61fbfe52 authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Bugfix] fixed inconsistent finish_reason handling between V0 and V1 engines (#27555)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 255e34ca
...@@ -42,13 +42,6 @@ def remove_all(lst: list, items_to_remove: set) -> list: ...@@ -42,13 +42,6 @@ def remove_all(lst: list, items_to_remove: set) -> list:
def check_stop( def check_stop(
request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None request: Request, max_model_len: int, pooler_output: torch.Tensor | None = None
) -> bool: ) -> bool:
if (
request.num_tokens >= max_model_len
or request.num_output_tokens >= request.max_tokens
):
request.status = RequestStatus.FINISHED_LENGTH_CAPPED
return True
if request.pooling_params: if request.pooling_params:
if pooler_output is not None: if pooler_output is not None:
request.status = RequestStatus.FINISHED_STOPPED request.status = RequestStatus.FINISHED_STOPPED
...@@ -70,4 +63,10 @@ def check_stop( ...@@ -70,4 +63,10 @@ def check_stop(
request.status = RequestStatus.FINISHED_STOPPED request.status = RequestStatus.FINISHED_STOPPED
request.stop_reason = last_token_id request.stop_reason = last_token_id
return True return True
if (
request.num_tokens >= max_model_len
or request.num_output_tokens >= request.max_tokens
):
request.status = RequestStatus.FINISHED_LENGTH_CAPPED
return True
return False return False
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment