Commit bc93c648 authored by Ceng23333's avatar Ceng23333
Browse files

fix stop string


Signed-off-by: default avatarCeng23333 <441651826@qq.com>
parent 7b744f46
...@@ -246,14 +246,26 @@ class LLMEngine: ...@@ -246,14 +246,26 @@ class LLMEngine:
req.mark_finished(req.finish_reason) req.mark_finished(req.finish_reason)
finished_now = True finished_now = True
# Remove stop string from generated_text if STOP_STRING finish reason
if req.finish_reason == FinishReason.STOP_STRING:
stop_strings = req.sampling_params.stop or []
for stop_str in stop_strings:
if decoded_text.endswith(stop_str):
# Remove the stop string from the end
decoded_text = decoded_text[:-len(stop_str)]
req.generated_text = decoded_text
break
holds_back_incomplete_utf8 = ( holds_back_incomplete_utf8 = (
bool(decoded_text) and decoded_text.endswith("\ufffd") bool(decoded_text) and decoded_text.endswith("\ufffd")
) )
# vLLM-style: hold back only if we are not on the final chunk. # vLLM-style: hold back only if we are not on the final chunk.
# Also suppress output when finish reason is LENGTH to avoid replacement issues. # Suppress output when finish reason is LENGTH or STOP_STRING.
# Root cause fix: When STOP_STRING is detected, we suppress output for the token
# that completes the stop string, preventing additional tokens from being output.
if (holds_back_incomplete_utf8 and not finished_now) or ( if (holds_back_incomplete_utf8 and not finished_now) or (
finished_now and req.finish_reason == FinishReason.LENGTH finished_now and req.finish_reason in (FinishReason.LENGTH, FinishReason.STOP_STRING)
): ):
token_text = "" token_text = ""
else: else:
...@@ -265,6 +277,14 @@ class LLMEngine: ...@@ -265,6 +277,14 @@ class LLMEngine:
# For non-streaming, finish checks happen here. # For non-streaming, finish checks happen here.
if req._output_queue is None and self._check_request_finished(req, token_id): if req._output_queue is None and self._check_request_finished(req, token_id):
req.mark_finished(req.finish_reason) req.mark_finished(req.finish_reason)
# Remove stop string from generated_text if STOP_STRING finish reason
if req.finish_reason == FinishReason.STOP_STRING:
stop_strings = req.sampling_params.stop or []
for stop_str in stop_strings:
if req.generated_text.endswith(stop_str):
# Remove the stop string from the end
req.generated_text = req.generated_text[:-len(stop_str)]
break
# Put output in queue if it exists (for async streaming) # Put output in queue if it exists (for async streaming)
if req._output_queue is not None: if req._output_queue is not None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment