Add xLAM tool parser support (#17148)

1d0ae26c · Zuxin · GitHub · 60219995 · 1d0ae26c · 1d0ae26c
Unverified Commit 1d0ae26c authored Jun 18, 2025 by Zuxin Committed by GitHub Jun 19, 2025
8 changed files
--- a/docs/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -226,6 +226,25 @@ AI21's Jamba-1.5 models are supported.
 Flags: `--tool-call-parser jamba`
+### xLAM Models (`xlam`)
+The xLAM tool parser is designed to support models that generate tool calls in various JSON formats. It detects function calls in several different output styles:
+1. Direct JSON arrays: Output strings that are JSON arrays starting with `[` and ending with `]`
+2. Thinking tags: Using `<think>...</think>` tags containing JSON arrays
+3. Code blocks: JSON in code blocks (```json ...```)
+4. Tool calls tags: Using `[TOOL_CALLS]` or `<tool_call>...</tool_call>` tags
+Parallel function calls are supported, and the parser can effectively separate text content from tool calls.
+Supported models:
+* Salesforce Llama-xLAM models: `Salesforce/Llama-xLAM-2-8B-fc-r`, `Salesforce/Llama-xLAM-2-70B-fc-r`
+* Qwen-xLAM models: `Salesforce/xLAM-1B-fc-r`, `Salesforce/xLAM-3B-fc-r`, `Salesforce/Qwen-xLAM-32B-fc-r`
+Flags:
+* For Llama-based xLAM models: `--tool-call-parser xlam --chat-template examples/tool_chat_template_xlam_llama.jinja`
+* For Qwen-based xLAM models: `--tool-call-parser xlam --chat-template examples/tool_chat_template_xlam_qwen.jinja`
 ### Qwen Models
 For Qwen2.5, the chat template in tokenizer_config.json has already included support for the Hermes-style tool use. Therefore, you can use the `hermes` parser to enable tool calls for Qwen models. For more detailed information, please refer to the official [Qwen documentation](https://qwen.readthedocs.io/en/latest/framework/function_call.html#vllm)

--- a/examples/online_serving/openai_chat_completion_client_with_tools_xlam.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools_xlam.py
+# SPDX-License-Identifier: Apache-2.0
+# ruff: noqa: E501
+"""
+Set up this example by starting a vLLM OpenAI-compatible server with tool call
+options enabled for xLAM-2 models:
+vllm serve --model Salesforce/Llama-xLAM-2-8b-fc-r --enable-auto-tool-choice --tool-call-parser xlam
+OR
+vllm serve --model Salesforce/xLAM-2-3b-fc-r --enable-auto-tool-choice --tool-call-parser xlam
+"""
+import json
+import time
+from openai import OpenAI
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "empty"
+openai_api_base = "http://localhost:8000/v1"
+# Define tool functions
+def get_weather(location: str, unit: str):
+    return f"Weather in {location} is 22 degrees {unit}."
+def calculate_expression(expression: str):
+    try:
+        result = eval(expression)
+        return f"The result of {expression} is {result}"
+    except Exception as e:
+        return f"Could not calculate {expression}: {e}"
+def translate_text(text: str, target_language: str):
+    return f"Translation of '{text}' to {target_language}: [translated content]"
+# Define tools
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g., 'San Francisco, CA'",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location", "unit"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculate_expression",
+            "description": "Calculate a mathematical expression",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "Mathematical expression to evaluate, needs to be a valid python expression",
+                    }
+                },
+                "required": ["expression"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "translate_text",
+            "description": "Translate text to another language",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "text": {"type": "string", "description": "Text to translate"},
+                    "target_language": {
+                        "type": "string",
+                        "description": "Target language for translation",
+                    },
+                },
+                "required": ["text", "target_language"],
+            },
+        },
+    },
+]
+# Map of function names to implementations
+tool_functions = {
+    "get_weather": get_weather,
+    "calculate_expression": calculate_expression,
+    "translate_text": translate_text,
+}
+def process_response(response, tool_functions, original_query):
+    """Process a non-streaming response with possible tool calls"""
+    print("\n--- Response Output ---")
+    # Check if the response has content
+    if response.choices[0].message.content:
+        print(f"Content: {response.choices[0].message.content}")
+    # Check if the response has tool calls
+    if response.choices[0].message.tool_calls:
+        print("--------------------------------")
+        print(f"Tool calls: {response.choices[0].message.tool_calls}")
+        print("--------------------------------")
+        # Collect all tool calls and results before making follow-up request
+        tool_results = []
+        assistant_message = {"role": "assistant"}
+        if response.choices[0].message.content:
+            assistant_message["content"] = response.choices[0].message.content
+        assistant_tool_calls = []
+        # Process each tool call
+        for tool_call in response.choices[0].message.tool_calls:
+            function_name = tool_call.function.name
+            function_args = tool_call.function.arguments
+            function_id = tool_call.id
+            print(f"Function called: {function_name}")
+            print(f"Arguments: {function_args}")
+            print(f"Function ID: {function_id}")
+            # Execute the function
+            try:
+                # Parse the JSON arguments
+                args = json.loads(function_args)
+                # Call the function with the arguments
+                function_result = tool_functions[function_name](**args)
+                print(f"\n--- Function Result ---\n{function_result}\n")
+                # Add tool call to assistant message
+                assistant_tool_calls.append(
+                    {
+                        "id": function_id,
+                        "type": "function",
+                        "function": {"name": function_name, "arguments": function_args},
+                    }
+                )
+                # Add tool result to tool_results
+                tool_results.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": function_id,
+                        "content": function_result,
+                    }
+                )
+            except Exception as e:
+                print(f"Error executing function: {e}")
+        # Add tool_calls to assistant message
+        assistant_message["tool_calls"] = assistant_tool_calls
+        # Create a follow-up message with all function results
+        follow_up_messages = [
+            {"role": "user", "content": original_query},
+            assistant_message,
+        ]
+        # Add all tool results to the messages
+        follow_up_messages.extend(tool_results)
+        # Get completion with all tool results in a single follow-up
+        follow_up_response = client.chat.completions.create(
+            model=client.models.list().data[0].id,
+            messages=follow_up_messages,
+            stream=False,
+        )
+        print("\n--- Follow-up Response ---")
+        print(follow_up_response.choices[0].message.content)
+        print("--- End Follow-up ---\n")
+    print("--- End Response ---\n")
+def run_test_case(query, test_name):
+    """Run a single test case with the given query"""
+    print(f"\n{'=' * 50}\nTEST CASE: {test_name}\n{'=' * 50}")
+    print(f"Query: '{query}'")
+    start_time = time.time()
+    # Create non-streaming chat completion request
+    response = client.chat.completions.create(
+        model=client.models.list().data[0].id,
+        messages=[{"role": "user", "content": query}],
+        tools=tools,
+        tool_choice="auto",
+        stream=False,
+    )
+    # Process the non-streaming response, passing the original query
+    process_response(response, tool_functions, query)
+    end_time = time.time()
+    print(f"Test completed in {end_time - start_time:.2f} seconds")
+def main():
+    # Initialize OpenAI client
+    global client
+    client = OpenAI(
+        api_key=openai_api_key,
+        base_url=openai_api_base,
+    )
+    # Run test cases
+    test_cases = [
+        ("I want to know the weather in San Francisco", "Weather Information"),
+        ("Calculate 25 * 17 + 31", "Math Calculation"),
+        ("Translate 'Hello world' to Spanish", "Text Translation"),
+        ("What is the weather in Tokyo and New York in celsius", "Multiple Tool Usage"),
+    ]
+    # Execute all test cases
+    for query, test_name in test_cases:
+        run_test_case(query, test_name)
+        time.sleep(1)  # Small delay between tests
+    print("\nAll tests completed.")
+if __name__ == "__main__":
+    main()
--- a/examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py
+++ b/examples/online_serving/openai_chat_completion_client_with_tools_xlam_streaming.py
+# SPDX-License-Identifier: Apache-2.0
+# ruff: noqa: E501
+"""
+Set up this example by starting a vLLM OpenAI-compatible server with tool call
+options enabled for xLAM-2 models:
+vllm serve --model Salesforce/Llama-xLAM-2-8b-fc-r --enable-auto-tool-choice --tool-call-parser xlam
+OR
+vllm serve --model Salesforce/xLAM-2-3b-fc-r --enable-auto-tool-choice --tool-call-parser xlam
+This example demonstrates streaming tool calls with xLAM models.
+"""
+import json
+import time
+from openai import OpenAI
+# Modify OpenAI's API key and API base to use vLLM's API server.
+openai_api_key = "empty"
+openai_api_base = "http://localhost:8000/v1"
+# Define tool functions
+def get_weather(location: str, unit: str):
+    return f"Weather in {location} is 22 degrees {unit}."
+def calculate_expression(expression: str):
+    try:
+        result = eval(expression)
+        return f"The result of {expression} is {result}"
+    except Exception as e:
+        return f"Could not calculate {expression}: {e}"
+def translate_text(text: str, target_language: str):
+    return f"Translation of '{text}' to {target_language}: [translated content]"
+# Define tools
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "City and state, e.g., 'San Francisco, CA'",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location", "unit"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculate_expression",
+            "description": "Calculate a mathematical expression",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "Mathematical expression to evaluate, needs to be a valid Python expression",
+                    }
+                },
+                "required": ["expression"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "translate_text",
+            "description": "Translate text to another language",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "text": {"type": "string", "description": "Text to translate"},
+                    "target_language": {
+                        "type": "string",
+                        "description": "Target language for translation",
+                    },
+                },
+                "required": ["text", "target_language"],
+            },
+        },
+    },
+]
+# Map of function names to implementations
+tool_functions = {
+    "get_weather": get_weather,
+    "calculate_expression": calculate_expression,
+    "translate_text": translate_text,
+}
+def process_stream(response, tool_functions, original_query):
+    """Process a streaming response with possible tool calls"""
+    # Track multiple tool calls
+    tool_calls = {}  # Dictionary to store tool calls by ID
+    current_id = None
+    print("\n--- Stream Output ---")
+    for chunk in response:
+        # Handle tool calls in the stream
+        if chunk.choices[0].delta.tool_calls:
+            for tool_call_chunk in chunk.choices[0].delta.tool_calls:
+                # Get the tool call ID
+                if hasattr(tool_call_chunk, "id") and tool_call_chunk.id:
+                    current_id = tool_call_chunk.id
+                    if current_id not in tool_calls:
+                        tool_calls[current_id] = {
+                            "function_name": None,
+                            "function_args": "",
+                            "function_id": current_id,
+                        }
+                # Extract function information as it comes in chunks
+                if (
+                    hasattr(tool_call_chunk, "function")
+                    and current_id
+                    and current_id in tool_calls
+                ):
+                    if (
+                        hasattr(tool_call_chunk.function, "name")
+                        and tool_call_chunk.function.name
+                    ):
+                        tool_calls[current_id]["function_name"] = (
+                            tool_call_chunk.function.name
+                        )
+                        print(f"Function called: {tool_call_chunk.function.name}")
+                    if (
+                        hasattr(tool_call_chunk.function, "arguments")
+                        and tool_call_chunk.function.arguments
+                    ):
+                        tool_calls[current_id]["function_args"] += (
+                            tool_call_chunk.function.arguments
+                        )
+                        print(f"Arguments chunk: {tool_call_chunk.function.arguments}")
+        # Handle regular content in the stream
+        elif chunk.choices[0].delta.content:
+            print(chunk.choices[0].delta.content, end="")
+    print("\n--- End Stream ---\n")
+    # Execute each function call and build messages for follow-up
+    follow_up_messages = [{"role": "user", "content": original_query}]
+    for tool_id, tool_data in tool_calls.items():
+        function_name = tool_data["function_name"]
+        function_args = tool_data["function_args"]
+        function_id = tool_data["function_id"]
+        if function_name and function_args:
+            try:
+                # Parse the JSON arguments
+                args = json.loads(function_args)
+                # Call the function with the arguments
+                function_result = tool_functions[function_name](**args)
+                print(
+                    f"\n--- Function Result ({function_name}) ---\n{function_result}\n"
+                )
+                # Add the assistant message with tool call
+                follow_up_messages.append(
+                    {
+                        "role": "assistant",
+                        "tool_calls": [
+                            {
+                                "id": function_id,
+                                "type": "function",
+                                "function": {
+                                    "name": function_name,
+                                    "arguments": function_args,
+                                },
+                            }
+                        ],
+                    }
+                )
+                # Add the tool message with function result
+                follow_up_messages.append(
+                    {
+                        "role": "tool",
+                        "tool_call_id": function_id,
+                        "content": function_result,
+                    }
+                )
+            except Exception as e:
+                print(f"Error executing function: {e}")
+    # Only send follow-up if we have results to process
+    if len(follow_up_messages) > 1:
+        # Create a follow-up message with all the function results
+        follow_up_response = client.chat.completions.create(
+            model=client.models.list().data[0].id,
+            messages=follow_up_messages,
+            stream=True,
+        )
+        print("\n--- Follow-up Response ---")
+        for chunk in follow_up_response:
+            if chunk.choices[0].delta.content:
+                print(chunk.choices[0].delta.content, end="")
+        print("\n--- End Follow-up ---\n")
+def run_test_case(query, test_name):
+    """Run a single test case with the given query"""
+    print(f"\n{'=' * 50}\nTEST CASE: {test_name}\n{'=' * 50}")
+    print(f"Query: '{query}'")
+    start_time = time.time()
+    # Create streaming chat completion request
+    response = client.chat.completions.create(
+        model=client.models.list().data[0].id,
+        messages=[{"role": "user", "content": query}],
+        tools=tools,
+        tool_choice="auto",
+        stream=True,
+    )
+    # Process the streaming response
+    process_stream(response, tool_functions, query)
+    end_time = time.time()
+    print(f"Test completed in {end_time - start_time:.2f} seconds")
+def main():
+    # Initialize OpenAI client
+    global client
+    client = OpenAI(
+        api_key=openai_api_key,
+        base_url=openai_api_base,
+    )
+    # Run test cases
+    test_cases = [
+        ("I want to know the weather in San Francisco", "Weather Information"),
+        ("Calculate 25 * 17 + 31", "Math Calculation"),
+        ("Translate 'Hello world' to Spanish", "Text Translation"),
+        ("What is the weather in Tokyo and New York in celsius", "Multiple Tool Usage"),
+    ]
+    # Execute all test cases
+    for query, test_name in test_cases:
+        run_test_case(query, test_name)
+        time.sleep(1)  # Small delay between tests
+    print("\nAll tests completed.")
+if __name__ == "__main__":
+    main()
--- a/examples/tool_chat_template_xlam_llama.jinja
+++ b/examples/tool_chat_template_xlam_llama.jinja
+{{- bos_token }}
+{%- if custom_tools is defined %}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if not tools_in_user_message is defined %}
+    {%- set tools_in_user_message = true %}
+{%- endif %}
+{%- if not tools is defined %}
+    {%- set tools = none %}
+{%- endif %}
+{#- Extract system message #}
+{{- "<|start_header_id|>system<|end_header_id|>\n\n" }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] | trim %}
+    {%- set messages = messages[1:] %}
+    {{- system_message + "\n" }}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant. You are developed by Salesforce xLAM team." %}
+    {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: 
+[{"name": "tool_call_name", "arguments": {"arg1": "value1", "arg2": "value2"}}, ... (additional parallel tool calls as needed)]
+If no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}
+    {{- system_message + "\n" }}
+    {%- if tools is not none %}
+        {{- format_instruction + "\n\n" }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is not none %}
+    {%- for t in tools %}
+        {{- t | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- "<|eot_id|>" }}
+{%- for message in messages %}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }}
+    {%- elif 'tool_calls' in message %}
+        {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}}
+        {%- if message['tool_calls'] %}
+            {{- "[" }}
+            {%- for tool_call_function in message.tool_calls %}
+                {%- set tool_call = tool_call_function.function %}
+                {{- '{"name": "' + tool_call.name + '", ' }}
+                {{- '"arguments": ' }}
+                {{- tool_call.arguments | tojson }}
+                {{- "}" }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "]" }}
+            {{- "<|eot_id|>" }}
+        {%- elif message['content'] %}
+            {{- message['content'] | trim + '<|eot_id|>' }}
+        {%- else %}
+            {{- "[]\n" + '<|eot_id|>' }}
+        {%- endif %}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|start_header_id|>" + "ipython" + "<|end_header_id|>\n\n" }}
+        {%- set content = message["content"] %}
+        {%- if content is mapping or (content is iterable and content is not string) %}
+            {{- content | tojson }}
+        {%- else %}
+            {{- content }}
+        {%- endif %}
+        {{- "<|eot_id|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}
+{%- endif %}
\ No newline at end of file
--- a/examples/tool_chat_template_xlam_qwen.jinja
+++ b/examples/tool_chat_template_xlam_qwen.jinja
+{# System message #}
+{{- "<|im_start|>system\n" }}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] | trim %}
+    {%- set messages = messages[1:] %}
+    {{- system_message + "\n" }}
+{%- else %}
+    {%- set system_message = "You are a helpful assistant. You are developed by Salesforce xLAM team." %}
+    {% set format_instruction %}You have access to a set of tools. When using tools, make calls in a single JSON array: 
+[{"name": "tool_call_name", "arguments": {"arg1": "value1", "arg2": "value2"}}, ... (additional parallel tool calls as needed)]
+If no tool is suitable, state that explicitly. If the user's input lacks required parameters, ask for clarification. Do not interpret or respond until tool results are returned. Once they are available, process them or make additional calls if needed. For tasks that don't require tools, such as casual conversation or general advice, respond directly in plain text. The available tools are:{% endset %}
+    {{- system_message + "\n" }}
+    {%- if tools is not none %}
+        {{- format_instruction + "\n\n" }}
+    {%- endif %}
+{%- endif %}
+{%- if tools is not none %}
+    {%- for func in tools %}
+        {{- func | tojson(indent=4) }}
+        {{- "\n\n" }}
+    {%- endfor %}
+{%- endif %}
+{{- "<|im_end|>\n" }}
+{%- for message in messages %}
+    {%- if message['role'] == 'tool' %}
+        {{- "<|im_start|>tool\n" }}
+        {%- if message.content is defined and message.content.content is defined %}
+            {%- set content = message.content.content %}
+        {%- else %}
+            {%- set content = message.content %}
+        {%- endif %}
+        {%- if content is mapping or content is iterable and content is not string %}
+            {{- content | tojson }}
+        {%- else %}
+            {{- content }}
+        {%- endif %}
+        {{- "<|im_end|>\n" }}
+    {%- elif 'tool_calls' in message %}
+        {{- "<|im_start|>assistant\n" }}
+        {%- if message['tool_calls'] %}
+            {{- "[" }}
+            {%- for tool_call in message.tool_calls %}
+                {%- set out = tool_call.function | tojson %}
+                {{- out }}
+                {%- if not loop.last %}
+                    {{- ", " }}
+                {%- endif %}
+            {%- endfor %}
+            {{- "]"}}
+        {%- elif message['content'] %}
+            {{- message['content'] | trim }}
+        {%- else %}
+            {{- "[]\n" }}
+        {%- endif %}
+        {{- "<|im_end|>\n" }}
+    {%- else %}
+        {{- "<|im_start|>" + message['role'] + "\n" + message['content'] | trim + "<|im_end|>\n" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- "<|im_start|>assistant\n" }}
+{%- endif %}
--- a/tests/tool_use/test_xlam_tool_parser.py
+++ b/tests/tool_use/test_xlam_tool_parser.py
+# SPDX-License-Identifier: Apache-2.0
+import json
+import pytest
+from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
+from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
+from vllm.transformers_utils.tokenizer import get_tokenizer
+# Use a common model that is likely to be available
+MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"
+@pytest.fixture(scope="module")
+def xlam_tokenizer():
+    return get_tokenizer(tokenizer_name=MODEL)
+@pytest.fixture
+def xlam_tool_parser(xlam_tokenizer):
+    return xLAMToolParser(xlam_tokenizer)
+def assert_tool_calls(actual_tool_calls: list[ToolCall],
+                      expected_tool_calls: list[ToolCall]):
+    assert len(actual_tool_calls) == len(expected_tool_calls)
+    for actual_tool_call, expected_tool_call in zip(actual_tool_calls,
+                                                    expected_tool_calls):
+        assert isinstance(actual_tool_call.id, str)
+        assert len(actual_tool_call.id) > 16
+        assert actual_tool_call.type == "function"
+        assert actual_tool_call.function == expected_tool_call.function
+def test_extract_tool_calls_no_tools(xlam_tool_parser):
+    model_output = "This is a test"
+    extracted_tool_calls = xlam_tool_parser.extract_tool_calls(
+        model_output, request=None)  # type: ignore[arg-type]
+    assert not extracted_tool_calls.tools_called
+    assert extracted_tool_calls.tool_calls == []
+    assert extracted_tool_calls.content == model_output
+@pytest.mark.parametrize(
+    ids=[
+        "parallel_tool_calls",
+        "single_tool_with_think_tag",
+        "single_tool_with_json_code_block",
+        "single_tool_with_tool_calls_tag",
+    ],
+    argnames=["model_output", "expected_tool_calls", "expected_content"],
+    argvalues=[
+        (
+            """[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}, {"name": "get_current_weather", "arguments": {"city": "Orlando", "state": "FL", "unit": "fahrenheit"}}]""",  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Dallas",
+                        "state": "TX",
+                        "unit": "fahrenheit",
+                    }),
+                )),
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Orlando",
+                        "state": "FL",
+                        "unit": "fahrenheit",
+                    }),
+                )),
+            ],
+            None,
+        ),
+        (
+            """<think>I'll help you with that.</think>[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]""",  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Dallas",
+                        "state": "TX",
+                        "unit": "fahrenheit",
+                    }),
+                ))
+            ],
+            "<think>I'll help you with that.</think>",
+        ),
+        (
+            """I'll help you with that.\n```json\n[{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]\n```""",  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Dallas",
+                        "state": "TX",
+                        "unit": "fahrenheit",
+                    }),
+                ))
+            ],
+            "I'll help you with that.",
+        ),
+        (
+            """I'll check the weather for you.[TOOL_CALLS][{"name": "get_current_weather", "arguments": {"city": "Dallas", "state": "TX", "unit": "fahrenheit"}}]""",  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Dallas",
+                        "state": "TX",
+                        "unit": "fahrenheit",
+                    }),
+                ))
+            ],
+            "I'll check the weather for you.",
+        ),
+    ],
+)
+def test_extract_tool_calls(xlam_tool_parser, model_output,
+                            expected_tool_calls, expected_content):
+    extracted_tool_calls = xlam_tool_parser.extract_tool_calls(
+        model_output, request=None)  # type: ignore[arg-type]
+    assert extracted_tool_calls.tools_called
+    assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
+    assert extracted_tool_calls.content == expected_content
+@pytest.mark.parametrize(
+    ids=["list_structured_tool_call"],
+    argnames=["model_output", "expected_tool_calls", "expected_content"],
+    argvalues=[
+        (
+            """[{"name": "get_current_weather", "arguments": {"city": "Seattle", "state": "WA", "unit": "celsius"}}]""",  # noqa: E501
+            [
+                ToolCall(function=FunctionCall(
+                    name="get_current_weather",
+                    arguments=json.dumps({
+                        "city": "Seattle",
+                        "state": "WA",
+                        "unit": "celsius",
+                    }),
+                ))
+            ],
+            None,
+        ),
+    ],
+)
+def test_extract_tool_calls_list_structure(xlam_tool_parser, model_output,
+                                           expected_tool_calls,
+                                           expected_content):
+    """Test extraction of tool calls when the model outputs a list-structured tool call."""  # noqa: E501
+    extracted_tool_calls = xlam_tool_parser.extract_tool_calls(
+        model_output, request=None)  # type: ignore[arg-type]
+    assert extracted_tool_calls.tools_called
+    assert_tool_calls(extracted_tool_calls.tool_calls, expected_tool_calls)
+    assert extracted_tool_calls.content == expected_content
+# Test for preprocess_model_output method
+def test_preprocess_model_output(xlam_tool_parser):
+    # Test with list structure
+    model_output = """[{"name": "get_current_weather", "arguments": {"city": "Seattle"}}]"""  # noqa: E501
+    content, potential_tool_calls = xlam_tool_parser.preprocess_model_output(
+        model_output)
+    assert content is None
+    assert potential_tool_calls == model_output
+    # Test with thinking tag
+    model_output = """<think>I'll help you with that.</think>[{"name": "get_current_weather", "arguments": {"city": "Seattle"}}]"""  # noqa: E501
+    content, potential_tool_calls = xlam_tool_parser.preprocess_model_output(
+        model_output)
+    assert content == "<think>I'll help you with that.</think>"
+    assert (
+        potential_tool_calls ==
+        '[{"name": "get_current_weather", "arguments": {"city": "Seattle"}}]')
+    # Test with JSON code block
+    model_output = """I'll help you with that.
+```json
+[{"name": "get_current_weather", "arguments": {"city": "Seattle"}}]
+```"""
+    content, potential_tool_calls = xlam_tool_parser.preprocess_model_output(
+        model_output)
+    assert content == "I'll help you with that."
+    assert "get_current_weather" in potential_tool_calls
+    # Test with no tool calls
+    model_output = """I'll help you with that."""
+    content, potential_tool_calls = xlam_tool_parser.preprocess_model_output(
+        model_output)
+    assert content == model_output
+    assert potential_tool_calls is None
+# Simulate streaming to test extract_tool_calls_streaming
+def test_streaming_with_list_structure(xlam_tool_parser):
+    # Reset streaming state
+    xlam_tool_parser.prev_tool_calls = []
+    xlam_tool_parser.current_tools_sent = []
+    xlam_tool_parser.streamed_args = []
+    xlam_tool_parser.current_tool_id = -1
+    # Simulate receiving a message with list structure
+    current_text = """[{"name": "get_current_weather", "arguments": {"city": "Seattle"}}]"""  # noqa: E501
+    # First call to set up the tool
+    xlam_tool_parser.extract_tool_calls_streaming(
+        previous_text="",
+        current_text=current_text,
+        delta_text="]",
+        previous_token_ids=[],
+        current_token_ids=[],
+        delta_token_ids=[],
+        request=None,
+    )
+    # Make sure the tool is set up correctly
+    assert (xlam_tool_parser.current_tool_id
+            >= 0), "Tool index should be initialized"
+    # Manually set up the state for sending the tool name
+    xlam_tool_parser.current_tools_sent = [False]
+    # Call to send the function name
+    result = xlam_tool_parser.extract_tool_calls_streaming(
+        previous_text=current_text,
+        current_text=current_text,
+        delta_text="",
+        previous_token_ids=[],
+        current_token_ids=[],
+        delta_token_ids=[],
+        request=None,
+    )
+    # Check that we get a result with the proper tool call
+    if result is not None:
+        assert hasattr(result, "tool_calls")
+        assert len(result.tool_calls) == 1
+        assert result.tool_calls[0].function.name == "get_current_weather"
--- a/vllm/entrypoints/openai/tool_parsers/__init__.py
+++ b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -13,11 +13,12 @@ from .llama_tool_parser import Llama3JsonToolParser
 from .mistral_tool_parser import MistralToolParser
 from .phi4mini_tool_parser import Phi4MiniJsonToolParser
 from .pythonic_tool_parser import PythonicToolParser
+from .xlam_tool_parser import xLAMToolParser
 __all__ = [
    "ToolParser", "ToolParserManager", "Granite20bFCToolParser",
    "GraniteToolParser", "Hermes2ProToolParser", "MistralToolParser",
    "Internlm2ToolParser", "Llama3JsonToolParser", "JambaToolParser",
    "Llama4PythonicToolParser", "PythonicToolParser", "Phi4MiniJsonToolParser",
-    "DeepSeekV3ToolParser"
+    "DeepSeekV3ToolParser", "xLAMToolParser"
 ]
--- a/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
+++ b/vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py