[feat] Add SGLANG_TOOL_STRICT_LEVEL for tool-call behavior control (#12423)

Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>

[feat] Add SGLANG_TOOL_STRICT_LEVEL for tool-call behavior control (#12423)
Signed-off-by: Xinyuan Tong <xinyuantong.cs@gmail.com>
d2a8f71c · Xinyuan Tong · GitHub · 2b7bf11b · d2a8f71c · d2a8f71c
Unverified Commit d2a8f71c authored Nov 01, 2025 by Xinyuan Tong Committed by GitHub Nov 01, 2025
4 changed files
--- a/docs/references/environment_variables.md
+++ b/docs/references/environment_variables.md
@@ -108,3 +108,9 @@ SGLang supports various environment variables that can be used to configure its
 | --- | --- | --- |
 | `SGLANG_WAIT_WEIGHTS_READY_TIMEOUT` | Timeout period for waiting on weights | `120` |
 | `SGLANG_DISABLE_OUTLINES_DISK_CACHE` | Disable Outlines disk cache | `true` |
+## Function Calling / Tool Use
+| Environment Variable | Description | Default Value |
+| --- | --- | --- |
+| `SGLANG_TOOL_STRICT_LEVEL` | Controls the strictness level of tool call parsing and validation. <br>**Level 0**: Off - No strict validation <br>**Level 1**: Function strict - Enables structural tag constraints for all tools (even if none have `strict=True` set) <br>**Level 2**: Parameter strict - Enforces strict parameter validation for all tools, treating them as if they all have `strict=True` set | `0` |
--- a/python/sglang/srt/environ.py
+++ b/python/sglang/srt/environ.py
@@ -2,6 +2,7 @@ import os
 import subprocess
 import warnings
 from contextlib import ExitStack, contextmanager
+from enum import IntEnum
 from typing import Any
@@ -105,6 +106,20 @@ class EnvFloat(EnvField):
            raise ValueError(f'"{value}" is not a valid float value')
+class ToolStrictLevel(IntEnum):
+    """
+    Defines the strictness levels for tool call parsing and validation.
+    OFF: No strict validation
+    FUNCTION: Enables structural tag constraints for all tools
+    PARAMETER: Enforces strict parameter validation for all tools
+    """
+    OFF = 0
+    FUNCTION = 1
+    PARAMETER = 2
 class Envs:
    # fmt: off
@@ -259,6 +274,9 @@ class Envs:
    # Sparse Embeddings
    SGLANG_EMBEDDINGS_SPARSE_HEAD = EnvStr(None)
+    # Tool-Call behavior
+    SGLANG_TOOL_STRICT_LEVEL = EnvInt(ToolStrictLevel.OFF)
    # fmt: on

--- a/python/sglang/srt/function_call/function_call_parser.py
+++ b/python/sglang/srt/function_call/function_call_parser.py
@@ -8,6 +8,7 @@ from sglang.srt.entrypoints.openai.protocol import (
    ToolCallConstraint,
    ToolChoice,
 )
+from sglang.srt.environ import ToolStrictLevel, envs
 from sglang.srt.function_call.base_format_detector import BaseFormatDetector
 from sglang.srt.function_call.core_types import ToolCallItem
 from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
@@ -62,6 +63,7 @@ class FunctionCallParser:
        self.detector = detector
        self.tools = tools
+        self.tool_strict_level = envs.SGLANG_TOOL_STRICT_LEVEL.get()
    def has_tool_call(self, text: str) -> bool:
        """
@@ -142,7 +144,10 @@ class FunctionCallParser:
            info = get_structure_info(name)
            # accept all if not strict, otherwise only accept the schema
-            schema = function.parameters if function.strict else {}
+            is_strict = (
+                function.strict or self.tool_strict_level >= ToolStrictLevel.PARAMETER
+            )
+            schema = function.parameters if is_strict else {}
            tool_structures.append(
                StructuresResponseFormat(
@@ -180,7 +185,10 @@ class FunctionCallParser:
        if (
            self.detector.supports_structural_tag()
            and tool_choice == "auto"
-            and any(tool.function.strict for tool in self.tools)
+            and (
+                any(tool.function.strict for tool in self.tools)
+                or self.tool_strict_level >= ToolStrictLevel.FUNCTION
+            )
        ):
            tag = self.get_structure_tag()
            return ("structural_tag", tag)

--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -27,7 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
 import orjson
 from sglang.srt.connector import ConnectorType
-from sglang.srt.environ import envs
+from sglang.srt.environ import ToolStrictLevel, envs
 from sglang.srt.function_call.function_call_parser import FunctionCallParser
 from sglang.srt.lora.lora_registry import LoRARef
 from sglang.srt.parser.reasoning_parser import ReasoningParser
@@ -1686,6 +1686,9 @@ class ServerArgs:
        os.environ["SGLANG_ENABLE_DETERMINISTIC_INFERENCE"] = (
            "1" if self.enable_deterministic_inference else "0"
        )
+        # Set the highest strict level for Kimi K2 tool calls
+        if self.tool_call_parser == "kimi_k2":
+            envs.SGLANG_TOOL_STRICT_LEVEL.set(ToolStrictLevel.PARAMETER)
    def _handle_cache_compatibility(self):
        if self.enable_hierarchical_cache and self.disable_radix_cache: