Fix Llama3.3 tool call support (#4320)

5fe79605 · Chang Su · GitHub · c6d7f8d3 · 5fe79605 · 5fe79605
Unverified Commit 5fe79605 authored Mar 13, 2025 by Chang Su Committed by GitHub Mar 13, 2025
Showing with 55 additions and 22 deletions

python/sglang/srt/function_call_parser.py python/sglang/srt/function_call_parser.py +33 -2

python/sglang/srt/openai_api/adapter.py python/sglang/srt/openai_api/adapter.py +22 -20

No files found.
--- a/python/sglang/srt/function_call_parser.py
+++ b/python/sglang/srt/function_call_parser.py
@@ -318,6 +318,10 @@ class Qwen25Detector(BaseFormatDetector):
        self.bot_token = "<tool_call>"
        self.eot_token = "</tool_call>"

+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a Qwen 2.5 format tool call."""
+        return self.bot_token in text
+
    def detect_and_parse(self, text: str, tools: List[Function]) -> List[ToolCallItem]:
        """
        One-time parsing: Detects and parses tool calls in the provided text.
@@ -352,6 +356,10 @@ class MistralDetector(BaseFormatDetector):
        self.bot_token = "[TOOL_CALLS] ["
        self.tool_call_regex = re.compile(r"\[{.*}\]", re.DOTALL)

+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a Mistral format tool call."""
+        return self.bot_token in text
+
    def _clean_text(self, text: str) -> str:
        """
        clean text to only leave ''[TOOL_CALLS] [{"name": xxx, "arguments": {xxx}}]'
@@ -397,12 +405,21 @@ class Llama32Detector(BaseFormatDetector):
        super().__init__()
        self.bot_token = "<|python_tag|>"

+    def has_tool_call(self, text: str) -> bool:
+        """Check if the text contains a Llama 3.2 format tool call."""
+        # depending on the prompt format the Llama model may or may not
+        # prefix the output with the <|python_tag|> token
+        return "<|python_tag|>" in text or text.startswith("{")
+
    def detect_and_parse(self, text: str, tools: List[Function]) -> List[ToolCallItem]:
        """Parse function calls from text, handling multiple JSON objects."""
-        if "<|python_tag|>" not in text:
+        if "<|python_tag|>" not in text and not text.startswith("{"):
            return []

-        _, action_text = text.split("<|python_tag|>")
+        if "<|python_tag|>" in text:
+            _, action_text = text.split("<|python_tag|>")
+        else:
+            action_text = text

        # Split by semicolon and process each part
        json_parts = [part.strip() for part in action_text.split(";") if part.strip()]
@@ -501,6 +518,20 @@ class FunctionCallParser:
        self.multi_format_parser = MultiFormatParser(detectors)
        self.tools = tools

+    def has_tool_call(self, text: str) -> bool:
+        """
+        Check if the given text contains a tool call in the format supported by this parser.
+        This delegates to the detector's implementation.
+
+        :param text: The text to check for tool calls
+        :return: True if the text contains a tool call, False otherwise
+        """
+        # Check all detectors in the multi_format_parser
+        for detector in self.multi_format_parser.detectors:
+            if detector.has_tool_call(text):
+                return True
+        return False
+
    def parse_non_stream(self, full_text: str):
        """
        Non-streaming call: one-time parsing

--- a/python/sglang/srt/openai_api/adapter.py
+++ b/python/sglang/srt/openai_api/adapter.py
@@ -1115,27 +1115,29 @@ def v1_chat_generate_response(
        else:
            reasoning_text = None

-        if tool_choice != "none" and any([i in text for i in TOOLS_TAG_LIST]):
-            if finish_reason == "stop":
-                finish_reason = "tool_calls"
-            try:
-                parser = FunctionCallParser(tools, tool_call_parser)
-                full_normal_text, call_info_list = parser.parse_non_stream(text)
-                tool_calls = [
-                    ToolCall(
-                        id=str(call_info.tool_index),
-                        function=FunctionResponse(
-                            name=call_info.name, arguments=call_info.parameters
-                        ),
+        if tool_choice != "none" and tools:
+            parser = FunctionCallParser(tools, tool_call_parser)
+            if parser.has_tool_call(text):
+                if finish_reason["type"] == "stop":
+                    finish_reason["type"] = "tool_calls"
+                    finish_reason["matched"] = None
+                try:
+                    full_normal_text, call_info_list = parser.parse_non_stream(text)
+                    tool_calls = [
+                        ToolCall(
+                            id=str(call_info.tool_index),
+                            function=FunctionResponse(
+                                name=call_info.name, arguments=call_info.parameters
+                            ),
+                        )
+                        for call_info in call_info_list
+                    ]
+                except Exception as e:
+                    logger.error(f"Exception: {e}")
+                    return create_error_response(
+                        HTTPStatus.BAD_REQUEST,
+                        "Failed to parse fc related info to json format!",
                    )
-                    for call_info in call_info_list
-                ]
-            except Exception as e:
-                logger.error(f"Exception: {e}")
-                return create_error_response(
-                    HTTPStatus.BAD_REQUEST,
-                    "Failed to parse fc related info to json format!",
-                )

        if to_file:
            # to make the choice data json serializable