Allow more flexible assistant and system response (#1256)

51c554d8 · Christopher Chou · GitHub · 79ece2c5 · 51c554d8 · 51c554d8
Unverified Commit 51c554d8 authored Aug 30, 2024 by Christopher Chou Committed by GitHub Aug 30, 2024
4 changed files
--- a/python/sglang/srt/conversation.py
+++ b/python/sglang/srt/conversation.py
@@ -386,7 +386,16 @@ def generate_chat_conv(
    for message in request.messages:
        msg_role = message.role
        if msg_role == "system":
-            conv.system_message = message.content
+            if isinstance(message.content, str):
+                conv.system_message = message.content
+            elif isinstance(message.content, list):
+                if (
+                    len(message.content) != 1
+                    or getattr(message.content[0], "type", None) != "text"
+                ):
+                    raise ValueError("The system message should be a single text.")
+                else:
+                    conv.system_message = getattr(message.content[0], "text", "")
        elif msg_role == "user":
            # Handle the various types of Chat Request content types here.
            role = conv.roles[0]
@@ -414,7 +423,20 @@ def generate_chat_conv(
                        conv.append_image(content.image_url.url)
                conv.append_message(conv.roles[0], real_content)
        elif msg_role == "assistant":
-            conv.append_message(conv.roles[1], message.content)
+            parsed_content = ""
+            if isinstance(message.content, str):
+                parsed_content = message.content
+            elif isinstance(message.content, list):
+                if (
+                    len(message.content) != 1
+                    or getattr(message.content[0], "type", None) != "text"
+                ):
+                    raise ValueError(
+                        "The assistant's response should be a single text."
+                    )
+                else:
+                    parsed_content = getattr(message.content[0], "text", "")
+            conv.append_message(conv.roles[1], parsed_content)
        else:
            raise ValueError(f"Unknown role: {msg_role}")

--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -844,8 +844,23 @@ def v1_chat_generate_request(
        if not isinstance(request.messages, str):
            # Apply chat template and its stop strings.
            if chat_template_name is None:
+                openai_compatible_messages = []
+                for message in request.messages:
+                    if isinstance(message.content, str):
+                        openai_compatible_messages.append(
+                            {"role": message.role, "content": message.content}
+                        )
+                    else:
+                        content_list = message.dict()["content"]
+                        for content in content_list:
+                            if content["type"] == "text":
+                                openai_compatible_messages.append(
+                                    {"role": message.role, "content": content["text"]}
+                                )
                prompt_ids = tokenizer_manager.tokenizer.apply_chat_template(
-                    request.messages, tokenize=True, add_generation_prompt=True
+                    openai_compatible_messages,
+                    tokenize=True,
+                    add_generation_prompt=True,
                )
                stop = request.stop
                image_data = None

--- a/python/sglang/srt/openai_api/protocol.py
+++ b/python/sglang/srt/openai_api/protocol.py
@@ -200,11 +200,6 @@ class CompletionStreamResponse(BaseModel):
    usage: Optional[UsageInfo] = None
-class ChatCompletionMessageGenericParam(BaseModel):
-    role: Literal["system", "assistant"]
-    content: str
 class ChatCompletionMessageContentTextPart(BaseModel):
    type: Literal["text"]
    text: str
@@ -225,6 +220,11 @@ ChatCompletionMessageContentPart = Union[
 ]
+class ChatCompletionMessageGenericParam(BaseModel):
+    role: Literal["system", "assistant"]
+    content: Union[str, List[ChatCompletionMessageContentTextPart]]
 class ChatCompletionMessageUserParam(BaseModel):
    role: Literal["user"]
    content: Union[str, List[ChatCompletionMessageContentPart]]

--- a/test/srt/test_vision_openai_server.py
+++ b/test/srt/test_vision_openai_server.py
@@ -76,6 +76,56 @@ class TestOpenAIVisionServer(unittest.TestCase):
        assert response.usage.completion_tokens > 0
        assert response.usage.total_tokens > 0
+    def test_multi_turn_chat_completion(self):
+        client = openai.Client(api_key=self.api_key, base_url=self.base_url)
+        response = client.chat.completions.create(
+            model="default",
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
+                            },
+                        },
+                        {
+                            "type": "text",
+                            "text": "Describe this image in a very short sentence.",
+                        },
+                    ],
+                },
+                {
+                    "role": "assistant",
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": "There is a man at the back of a yellow cab ironing his clothes.",
+                        }
+                    ],
+                },
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Repeat your previous answer."}
+                    ],
+                },
+            ],
+            temperature=0,
+        )
+        assert response.choices[0].message.role == "assistant"
+        text = response.choices[0].message.content
+        assert isinstance(text, str)
+        assert "man" in text or "cab" in text, text
+        assert response.id
+        assert response.created
+        assert response.usage.prompt_tokens > 0
+        assert response.usage.completion_tokens > 0
+        assert response.usage.total_tokens > 0
    def test_mult_images_chat_completion(self):
        client = openai.Client(api_key=self.api_key, base_url=self.base_url)