"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "0a2c42f3e2b5dbf793f28efa11f56ca1e510bc7f"
Unverified Commit a95d5589 authored by Gleb Drozdov's avatar Gleb Drozdov Committed by GitHub
Browse files

Add matched_stop token or str to distinguish between eos or stop str...

Add matched_stop token or str to distinguish between eos or stop str finish_reason generation (#1684)
parent d17d19e5
...@@ -621,16 +621,19 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False): ...@@ -621,16 +621,19 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False):
else: else:
logprobs = None logprobs = None
finish_reason = ret_item["meta_info"]["finish_reason"]
if to_file: if to_file:
# to make the choise data json serializable # to make the choise data json serializable
choice_data = { choice_data = {
"index": 0, "index": 0,
"text": text, "text": text,
"logprobs": logprobs, "logprobs": logprobs,
"finish_reason": ( "finish_reason": (finish_reason["type"] if finish_reason else ""),
ret_item["meta_info"]["finish_reason"]["type"] "matched_stop": (
if ret_item["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
} }
else: else:
...@@ -638,10 +641,11 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False): ...@@ -638,10 +641,11 @@ def v1_generate_response(request, ret, tokenizer_manager, to_file=False):
index=idx, index=idx,
text=text, text=text,
logprobs=logprobs, logprobs=logprobs,
finish_reason=( finish_reason=(finish_reason["type"] if finish_reason else ""),
ret_item["meta_info"]["finish_reason"]["type"] matched_stop=(
if ret_item["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
) )
...@@ -771,14 +775,16 @@ async def v1_completions(tokenizer_manager, raw_request: Request): ...@@ -771,14 +775,16 @@ async def v1_completions(tokenizer_manager, raw_request: Request):
delta = text[len(stream_buffer) :] delta = text[len(stream_buffer) :]
stream_buffer = stream_buffer + delta stream_buffer = stream_buffer + delta
finish_reason = content["meta_info"]["finish_reason"]
choice_data = CompletionResponseStreamChoice( choice_data = CompletionResponseStreamChoice(
index=index, index=index,
text=delta, text=delta,
logprobs=logprobs, logprobs=logprobs,
finish_reason=( finish_reason=(finish_reason["type"] if finish_reason else ""),
content["meta_info"]["finish_reason"]["type"] matched_stop=(
if content["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
) )
chunk = CompletionStreamResponse( chunk = CompletionStreamResponse(
...@@ -1016,16 +1022,19 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False): ...@@ -1016,16 +1022,19 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False):
else: else:
choice_logprobs = None choice_logprobs = None
finish_reason = ret_item["meta_info"]["finish_reason"]
if to_file: if to_file:
# to make the choice data json serializable # to make the choice data json serializable
choice_data = { choice_data = {
"index": 0, "index": 0,
"message": {"role": "assistant", "content": ret_item["text"]}, "message": {"role": "assistant", "content": ret_item["text"]},
"logprobs": choice_logprobs, "logprobs": choice_logprobs,
"finish_reason": ( "finish_reason": (finish_reason["type"] if finish_reason else ""),
ret_item["meta_info"]["finish_reason"]["type"] "matched_stop": (
if ret_item["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
} }
else: else:
...@@ -1033,10 +1042,11 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False): ...@@ -1033,10 +1042,11 @@ def v1_chat_generate_response(request, ret, to_file=False, cache_report=False):
index=idx, index=idx,
message=ChatMessage(role="assistant", content=ret_item["text"]), message=ChatMessage(role="assistant", content=ret_item["text"]),
logprobs=choice_logprobs, logprobs=choice_logprobs,
finish_reason=( finish_reason=(finish_reason["type"] if finish_reason else ""),
ret_item["meta_info"]["finish_reason"]["type"] matched_stop=(
if ret_item["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
) )
...@@ -1159,6 +1169,8 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ...@@ -1159,6 +1169,8 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
else: else:
choice_logprobs = None choice_logprobs = None
finish_reason = content["meta_info"]["finish_reason"]
if is_first: if is_first:
# First chunk with role # First chunk with role
is_first = False is_first = False
...@@ -1166,9 +1178,12 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ...@@ -1166,9 +1178,12 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
index=index, index=index,
delta=DeltaMessage(role="assistant"), delta=DeltaMessage(role="assistant"),
finish_reason=( finish_reason=(
content["meta_info"]["finish_reason"]["type"] finish_reason["type"] if finish_reason else ""
if content["meta_info"]["finish_reason"] ),
else "" matched_stop=(
finish_reason["matched"]
if finish_reason and "matched" in finish_reason
else None
), ),
logprobs=choice_logprobs, logprobs=choice_logprobs,
) )
...@@ -1185,10 +1200,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request): ...@@ -1185,10 +1200,11 @@ async def v1_chat_completions(tokenizer_manager, raw_request: Request):
choice_data = ChatCompletionResponseStreamChoice( choice_data = ChatCompletionResponseStreamChoice(
index=index, index=index,
delta=DeltaMessage(content=delta), delta=DeltaMessage(content=delta),
finish_reason=( finish_reason=(finish_reason["type"] if finish_reason else ""),
content["meta_info"]["finish_reason"]["type"] matched_stop=(
if content["meta_info"]["finish_reason"] finish_reason["matched"]
else "" if finish_reason and "matched" in finish_reason
else None
), ),
logprobs=choice_logprobs, logprobs=choice_logprobs,
) )
......
...@@ -184,6 +184,7 @@ class CompletionResponseChoice(BaseModel): ...@@ -184,6 +184,7 @@ class CompletionResponseChoice(BaseModel):
text: str text: str
logprobs: Optional[LogProbs] = None logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class CompletionResponse(BaseModel): class CompletionResponse(BaseModel):
...@@ -200,6 +201,7 @@ class CompletionResponseStreamChoice(BaseModel): ...@@ -200,6 +201,7 @@ class CompletionResponseStreamChoice(BaseModel):
text: str text: str
logprobs: Optional[LogProbs] = None logprobs: Optional[LogProbs] = None
finish_reason: Optional[str] = None finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class CompletionStreamResponse(BaseModel): class CompletionStreamResponse(BaseModel):
...@@ -291,6 +293,7 @@ class ChatCompletionResponseChoice(BaseModel): ...@@ -291,6 +293,7 @@ class ChatCompletionResponseChoice(BaseModel):
message: ChatMessage message: ChatMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: str finish_reason: str
matched_stop: Union[None, int, str] = None
class ChatCompletionResponse(BaseModel): class ChatCompletionResponse(BaseModel):
...@@ -312,6 +315,7 @@ class ChatCompletionResponseStreamChoice(BaseModel): ...@@ -312,6 +315,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
delta: DeltaMessage delta: DeltaMessage
logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None logprobs: Optional[Union[LogProbs, ChoiceLogprobs]] = None
finish_reason: Optional[str] = None finish_reason: Optional[str] = None
matched_stop: Union[None, int, str] = None
class ChatCompletionStreamResponse(BaseModel): class ChatCompletionStreamResponse(BaseModel):
......
import json
import unittest
import requests
from sglang.srt.utils import kill_child_process
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)
MANY_NEW_TOKENS_PROMPT = """
Please write an extremely detailed and vivid fantasy story, set in a world full of intricate magic systems, political intrigue, and complex characters.
Ensure that you thoroughly describe every scene, character's motivations, and the environment. Include long, engaging dialogues and elaborate on the inner thoughts of the characters.
Each section should be as comprehensive as possible to create a rich and immersive experience for the reader.
The story should span multiple events, challenges, and character developments over time. Aim to make the story at least 3,000 words long.
"""
class TestMatchedStop(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls.model = DEFAULT_MODEL_NAME_FOR_TEST
cls.base_url = DEFAULT_URL_FOR_TEST
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=300,
other_args=["--max-running-requests", "10"],
)
@classmethod
def tearDownClass(cls):
kill_child_process(cls.process.pid)
def run_completions_generation(
self,
prompt=MANY_NEW_TOKENS_PROMPT,
max_tokens=1,
stop=None,
finish_reason=None,
matched_stop=None,
):
payload = {
"prompt": prompt,
"model": self.model,
"temperature": 0,
"top_p": 1,
"max_tokens": max_tokens,
}
if stop is not None:
payload["stop"] = stop
response_completions = requests.post(
self.base_url + "/v1/completions",
json=payload,
)
print(json.dumps(response_completions.json()))
print("=" * 100)
assert (
response_completions.json()["choices"][0]["finish_reason"] == finish_reason
)
assert response_completions.json()["choices"][0]["matched_stop"] == matched_stop
def run_chat_completions_generation(
self,
prompt=MANY_NEW_TOKENS_PROMPT,
max_tokens=1,
stop=None,
finish_reason=None,
matched_stop=None,
):
chat_payload = {
"model": self.model,
"messages": [
{"role": "system", "content": "You are a helpful AI assistant"},
{"role": "user", "content": prompt},
],
"temperature": 0,
"top_p": 1,
"max_tokens": max_tokens,
}
if stop is not None:
chat_payload["stop"] = stop
response_chat = requests.post(
self.base_url + "/v1/chat/completions",
json=chat_payload,
)
print(json.dumps(response_chat.json()))
print("=" * 100)
assert response_chat.json()["choices"][0]["finish_reason"] == finish_reason
assert response_chat.json()["choices"][0]["matched_stop"] == matched_stop
def test_finish_stop_str(self):
self.run_completions_generation(
max_tokens=1000, stop="\n", finish_reason="stop", matched_stop="\n"
)
self.run_chat_completions_generation(
max_tokens=1000, stop="\n", finish_reason="stop", matched_stop="\n"
)
def test_finish_stop_eos(self):
llama_format_prompt = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
What is 2 + 2?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
eos_token_id = 128009
self.run_completions_generation(
prompt=llama_format_prompt,
max_tokens=1000,
finish_reason="stop",
matched_stop=eos_token_id,
)
self.run_chat_completions_generation(
prompt="What is 2 + 2?",
max_tokens=1000,
finish_reason="stop",
matched_stop=eos_token_id,
)
def test_finish_length(self):
self.run_completions_generation(
max_tokens=5, finish_reason="length", matched_stop=None
)
self.run_chat_completions_generation(
max_tokens=5, finish_reason="length", matched_stop=None
)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment