Unverified Commit d2a8f71c authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

[feat] Add SGLANG_TOOL_STRICT_LEVEL for tool-call behavior control (#12423)


Signed-off-by: default avatarXinyuan Tong <xinyuantong.cs@gmail.com>
parent 2b7bf11b
...@@ -108,3 +108,9 @@ SGLang supports various environment variables that can be used to configure its ...@@ -108,3 +108,9 @@ SGLang supports various environment variables that can be used to configure its
| --- | --- | --- | | --- | --- | --- |
| `SGLANG_WAIT_WEIGHTS_READY_TIMEOUT` | Timeout period for waiting on weights | `120` | | `SGLANG_WAIT_WEIGHTS_READY_TIMEOUT` | Timeout period for waiting on weights | `120` |
| `SGLANG_DISABLE_OUTLINES_DISK_CACHE` | Disable Outlines disk cache | `true` | | `SGLANG_DISABLE_OUTLINES_DISK_CACHE` | Disable Outlines disk cache | `true` |
## Function Calling / Tool Use
| Environment Variable | Description | Default Value |
| --- | --- | --- |
| `SGLANG_TOOL_STRICT_LEVEL` | Controls the strictness level of tool call parsing and validation. <br>**Level 0**: Off - No strict validation <br>**Level 1**: Function strict - Enables structural tag constraints for all tools (even if none have `strict=True` set) <br>**Level 2**: Parameter strict - Enforces strict parameter validation for all tools, treating them as if they all have `strict=True` set | `0` |
...@@ -2,6 +2,7 @@ import os ...@@ -2,6 +2,7 @@ import os
import subprocess import subprocess
import warnings import warnings
from contextlib import ExitStack, contextmanager from contextlib import ExitStack, contextmanager
from enum import IntEnum
from typing import Any from typing import Any
...@@ -105,6 +106,20 @@ class EnvFloat(EnvField): ...@@ -105,6 +106,20 @@ class EnvFloat(EnvField):
raise ValueError(f'"{value}" is not a valid float value') raise ValueError(f'"{value}" is not a valid float value')
class ToolStrictLevel(IntEnum):
"""
Defines the strictness levels for tool call parsing and validation.
OFF: No strict validation
FUNCTION: Enables structural tag constraints for all tools
PARAMETER: Enforces strict parameter validation for all tools
"""
OFF = 0
FUNCTION = 1
PARAMETER = 2
class Envs: class Envs:
# fmt: off # fmt: off
...@@ -259,6 +274,9 @@ class Envs: ...@@ -259,6 +274,9 @@ class Envs:
# Sparse Embeddings # Sparse Embeddings
SGLANG_EMBEDDINGS_SPARSE_HEAD = EnvStr(None) SGLANG_EMBEDDINGS_SPARSE_HEAD = EnvStr(None)
# Tool-Call behavior
SGLANG_TOOL_STRICT_LEVEL = EnvInt(ToolStrictLevel.OFF)
# fmt: on # fmt: on
......
...@@ -8,6 +8,7 @@ from sglang.srt.entrypoints.openai.protocol import ( ...@@ -8,6 +8,7 @@ from sglang.srt.entrypoints.openai.protocol import (
ToolCallConstraint, ToolCallConstraint,
ToolChoice, ToolChoice,
) )
from sglang.srt.environ import ToolStrictLevel, envs
from sglang.srt.function_call.base_format_detector import BaseFormatDetector from sglang.srt.function_call.base_format_detector import BaseFormatDetector
from sglang.srt.function_call.core_types import ToolCallItem from sglang.srt.function_call.core_types import ToolCallItem
from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
...@@ -62,6 +63,7 @@ class FunctionCallParser: ...@@ -62,6 +63,7 @@ class FunctionCallParser:
self.detector = detector self.detector = detector
self.tools = tools self.tools = tools
self.tool_strict_level = envs.SGLANG_TOOL_STRICT_LEVEL.get()
def has_tool_call(self, text: str) -> bool: def has_tool_call(self, text: str) -> bool:
""" """
...@@ -142,7 +144,10 @@ class FunctionCallParser: ...@@ -142,7 +144,10 @@ class FunctionCallParser:
info = get_structure_info(name) info = get_structure_info(name)
# accept all if not strict, otherwise only accept the schema # accept all if not strict, otherwise only accept the schema
schema = function.parameters if function.strict else {} is_strict = (
function.strict or self.tool_strict_level >= ToolStrictLevel.PARAMETER
)
schema = function.parameters if is_strict else {}
tool_structures.append( tool_structures.append(
StructuresResponseFormat( StructuresResponseFormat(
...@@ -180,7 +185,10 @@ class FunctionCallParser: ...@@ -180,7 +185,10 @@ class FunctionCallParser:
if ( if (
self.detector.supports_structural_tag() self.detector.supports_structural_tag()
and tool_choice == "auto" and tool_choice == "auto"
and any(tool.function.strict for tool in self.tools) and (
any(tool.function.strict for tool in self.tools)
or self.tool_strict_level >= ToolStrictLevel.FUNCTION
)
): ):
tag = self.get_structure_tag() tag = self.get_structure_tag()
return ("structural_tag", tag) return ("structural_tag", tag)
......
...@@ -27,7 +27,7 @@ from typing import Dict, List, Literal, Optional, Union ...@@ -27,7 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
import orjson import orjson
from sglang.srt.connector import ConnectorType from sglang.srt.connector import ConnectorType
from sglang.srt.environ import envs from sglang.srt.environ import ToolStrictLevel, envs
from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.lora.lora_registry import LoRARef from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.parser.reasoning_parser import ReasoningParser from sglang.srt.parser.reasoning_parser import ReasoningParser
...@@ -1686,6 +1686,9 @@ class ServerArgs: ...@@ -1686,6 +1686,9 @@ class ServerArgs:
os.environ["SGLANG_ENABLE_DETERMINISTIC_INFERENCE"] = ( os.environ["SGLANG_ENABLE_DETERMINISTIC_INFERENCE"] = (
"1" if self.enable_deterministic_inference else "0" "1" if self.enable_deterministic_inference else "0"
) )
# Set the highest strict level for Kimi K2 tool calls
if self.tool_call_parser == "kimi_k2":
envs.SGLANG_TOOL_STRICT_LEVEL.set(ToolStrictLevel.PARAMETER)
def _handle_cache_compatibility(self): def _handle_cache_compatibility(self):
if self.enable_hierarchical_cache and self.disable_radix_cache: if self.enable_hierarchical_cache and self.disable_radix_cache:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment