Unverified Commit d2a8f71c authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

[feat] Add SGLANG_TOOL_STRICT_LEVEL for tool-call behavior control (#12423)


Signed-off-by: default avatarXinyuan Tong <xinyuantong.cs@gmail.com>
parent 2b7bf11b
......@@ -108,3 +108,9 @@ SGLang supports various environment variables that can be used to configure its
| --- | --- | --- |
| `SGLANG_WAIT_WEIGHTS_READY_TIMEOUT` | Timeout period for waiting on weights | `120` |
| `SGLANG_DISABLE_OUTLINES_DISK_CACHE` | Disable Outlines disk cache | `true` |
## Function Calling / Tool Use
| Environment Variable | Description | Default Value |
| --- | --- | --- |
| `SGLANG_TOOL_STRICT_LEVEL` | Controls the strictness level of tool call parsing and validation. <br>**Level 0**: Off - No strict validation <br>**Level 1**: Function strict - Enables structural tag constraints for all tools (even if none have `strict=True` set) <br>**Level 2**: Parameter strict - Enforces strict parameter validation for all tools, treating them as if they all have `strict=True` set | `0` |
......@@ -2,6 +2,7 @@ import os
import subprocess
import warnings
from contextlib import ExitStack, contextmanager
from enum import IntEnum
from typing import Any
......@@ -105,6 +106,20 @@ class EnvFloat(EnvField):
raise ValueError(f'"{value}" is not a valid float value')
class ToolStrictLevel(IntEnum):
"""
Defines the strictness levels for tool call parsing and validation.
OFF: No strict validation
FUNCTION: Enables structural tag constraints for all tools
PARAMETER: Enforces strict parameter validation for all tools
"""
OFF = 0
FUNCTION = 1
PARAMETER = 2
class Envs:
# fmt: off
......@@ -259,6 +274,9 @@ class Envs:
# Sparse Embeddings
SGLANG_EMBEDDINGS_SPARSE_HEAD = EnvStr(None)
# Tool-Call behavior
SGLANG_TOOL_STRICT_LEVEL = EnvInt(ToolStrictLevel.OFF)
# fmt: on
......
......@@ -8,6 +8,7 @@ from sglang.srt.entrypoints.openai.protocol import (
ToolCallConstraint,
ToolChoice,
)
from sglang.srt.environ import ToolStrictLevel, envs
from sglang.srt.function_call.base_format_detector import BaseFormatDetector
from sglang.srt.function_call.core_types import ToolCallItem
from sglang.srt.function_call.deepseekv3_detector import DeepSeekV3Detector
......@@ -62,6 +63,7 @@ class FunctionCallParser:
self.detector = detector
self.tools = tools
self.tool_strict_level = envs.SGLANG_TOOL_STRICT_LEVEL.get()
def has_tool_call(self, text: str) -> bool:
"""
......@@ -142,7 +144,10 @@ class FunctionCallParser:
info = get_structure_info(name)
# accept all if not strict, otherwise only accept the schema
schema = function.parameters if function.strict else {}
is_strict = (
function.strict or self.tool_strict_level >= ToolStrictLevel.PARAMETER
)
schema = function.parameters if is_strict else {}
tool_structures.append(
StructuresResponseFormat(
......@@ -180,7 +185,10 @@ class FunctionCallParser:
if (
self.detector.supports_structural_tag()
and tool_choice == "auto"
and any(tool.function.strict for tool in self.tools)
and (
any(tool.function.strict for tool in self.tools)
or self.tool_strict_level >= ToolStrictLevel.FUNCTION
)
):
tag = self.get_structure_tag()
return ("structural_tag", tag)
......
......@@ -27,7 +27,7 @@ from typing import Dict, List, Literal, Optional, Union
import orjson
from sglang.srt.connector import ConnectorType
from sglang.srt.environ import envs
from sglang.srt.environ import ToolStrictLevel, envs
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.lora.lora_registry import LoRARef
from sglang.srt.parser.reasoning_parser import ReasoningParser
......@@ -1686,6 +1686,9 @@ class ServerArgs:
os.environ["SGLANG_ENABLE_DETERMINISTIC_INFERENCE"] = (
"1" if self.enable_deterministic_inference else "0"
)
# Set the highest strict level for Kimi K2 tool calls
if self.tool_call_parser == "kimi_k2":
envs.SGLANG_TOOL_STRICT_LEVEL.set(ToolStrictLevel.PARAMETER)
def _handle_cache_compatibility(self):
if self.enable_hierarchical_cache and self.disable_radix_cache:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment