"vllm/vscode:/vscode.git/clone" did not exist on "611b18757e36da77d2f1cbe273c99b167bf0d5de"
Unverified Commit f4d97e4f authored by bnellnm's avatar bnellnm Committed by GitHub
Browse files

[Bug] [V1] Try fetching stop_reason from EngineOutput before checking the request (#13108)

parent f1042e86
......@@ -2,7 +2,7 @@
import asyncio
from dataclasses import dataclass
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union
from vllm.outputs import RequestOutput
from vllm.sampling_params import RequestOutputKind
......@@ -164,6 +164,7 @@ class OutputProcessor:
new_token_ids = engine_core_output.new_token_ids
finish_reason = engine_core_output.finish_reason
stop_reason = engine_core_output.stop_reason
# TODO(andy): prompt logprobs + chunked prefill can
# result in engine core returning an output for a
......@@ -181,9 +182,10 @@ class OutputProcessor:
# 2) Detokenize the token ids into text and check for stop
# strings.
stop_reason = req_state.detokenizer.update(new_token_ids)
if stop_reason:
stop_string = req_state.detokenizer.update(new_token_ids)
if stop_string and finish_reason != FinishReason.STOP:
finish_reason = FinishReason.STOP
stop_reason = stop_string
# 3) Compute sample and prompt logprobs for request,
# if required.
......@@ -250,7 +252,7 @@ class OutputProcessor:
request_state: RequestState,
new_token_ids: List[int],
finish_reason: Optional[FinishReason],
stop_reason: Optional[str],
stop_reason: Union[int, str, None],
) -> Optional[RequestOutput]:
finished = finish_reason is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment