Unverified Commit 35fa7129 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat(v4): cherry-pick #8665 onto release/deepseekv4 (#8709)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 01002df7
...@@ -84,7 +84,7 @@ $global:copyright_results = @{ ...@@ -84,7 +84,7 @@ $global:copyright_results = @{
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt', 'exclusions.txt') $ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt', 'exclusions.txt')
write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']." write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models', 'lib/llm/tests/data/deepseek-v3.2') $ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models', 'lib/llm/tests/data/deepseek-v3.2', 'lib/llm/tests/data/deepseek-v4')
write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']." write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
$ignored_types = @('.bat', '.gif', '.ico', '.ipynb', '.jpg', '.jpeg', '.patch', '.png', '.pyc', '.pyi', '.rst', '.zip', '.md', '.json') $ignored_types = @('.bat', '.gif', '.ico', '.ipynb', '.jpg', '.jpeg', '.patch', '.png', '.pyc', '.pyi', '.rst', '.zip', '.md', '.json')
write-debug "<copyright-check> ignored_types = ['$($ignored_types -join "', '")']." write-debug "<copyright-check> ignored_types = ['$($ignored_types -join "', '")']."
......
...@@ -3,9 +3,12 @@ ...@@ -3,9 +3,12 @@
from __future__ import annotations from __future__ import annotations
import copy
import inspect
import json import json
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from functools import lru_cache
from typing import Any, TypeAlias from typing import Any, TypeAlias
from sglang.srt.entrypoints.openai.protocol import Function as SglangFunction from sglang.srt.entrypoints.openai.protocol import Function as SglangFunction
...@@ -129,6 +132,127 @@ def _is_named_tool_choice(tool_choice: Any) -> bool: ...@@ -129,6 +132,127 @@ def _is_named_tool_choice(tool_choice: Any) -> bool:
) )
def _normalize_deepseek_v4_hint(value: Any) -> str:
return str(value or "").lower().replace("-", "").replace("_", "")
def _should_use_deepseek_v4_encoding(
request: dict[str, Any],
*,
tokenizer,
tool_call_parser_name: str | None,
reasoning_parser_name: str | None,
) -> bool:
if getattr(tokenizer, "chat_template", None) is not None:
return False
return any(
"deepseekv4" in _normalize_deepseek_v4_hint(value)
for value in (
request.get("model"),
tool_call_parser_name,
reasoning_parser_name,
)
)
def _filter_template_tools(
request: dict[str, Any],
*,
exclude_tools_when_tool_choice_none: bool,
) -> list[dict[str, Any]] | None:
raw_tools = request.get("tools") or []
if not raw_tools:
return None
tool_choice = request.get("tool_choice", "auto")
if exclude_tools_when_tool_choice_none and tool_choice == "none":
return None
if _is_named_tool_choice(tool_choice):
chosen_name = tool_choice["function"]["name"]
return [
copy.deepcopy(tool)
for tool in raw_tools
if tool.get("function", {}).get("name") == chosen_name
]
return copy.deepcopy(raw_tools)
def _render_deepseek_v4_prompt_token_ids(
request: dict[str, Any],
*,
messages: list[dict[str, Any]],
tokenizer,
template_tools: list[dict[str, Any]] | None,
) -> list[int]:
try:
from sglang.srt.entrypoints.openai.encoding_dsv4 import encode_messages
except ImportError as exc:
raise ValueError(
"DeepSeek-V4 preprocessing requires SGLang's "
"sglang.srt.entrypoints.openai.encoding_dsv4 encoder. "
"Install an SGLang build that includes the DeepSeek-V4 integration."
) from exc
encoding_messages = copy.deepcopy(messages)
for msg in encoding_messages:
if msg.get("content") is None:
msg["content"] = ""
if template_tools:
if not encoding_messages or encoding_messages[0].get("role") != "system":
encoding_messages.insert(0, {"role": "system", "content": ""})
encoding_messages[0]["tools"] = template_tools
chat_template_kwargs = request.get("chat_template_kwargs") or {}
thinking_mode = "thinking" if chat_template_kwargs.get("thinking") else "chat"
reasoning_effort = (
request.get("reasoning_effort")
or chat_template_kwargs.get("reasoning_effort")
or None
)
if reasoning_effort not in ("max", "high", None):
reasoning_effort = None
prompt = encode_messages(
encoding_messages,
thinking_mode=thinking_mode,
reasoning_effort=reasoning_effort,
)
return _normalize_prompt_token_ids(tokenizer.encode(prompt))
@lru_cache(maxsize=64)
def _callable_accepts_kwarg(func: Any, kwarg: str) -> bool:
try:
signature = inspect.signature(func)
except (TypeError, ValueError):
return False
for name, param in signature.parameters.items():
if param.kind == inspect.Parameter.VAR_KEYWORD:
return True
if name == kwarg and param.kind in (
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
):
return True
return False
def _call_with_optional_parallel_tool_calls(
func: Any,
*args: Any,
parallel_tool_calls: Any,
) -> Any:
"""Call SGLang helpers across versions with/without parallel_tool_calls."""
if _callable_accepts_kwarg(func, "parallel_tool_calls"):
return func(*args, parallel_tool_calls=parallel_tool_calls)
return func(*args)
def build_tool_call_guided_decoding( def build_tool_call_guided_decoding(
request: dict[str, Any], request: dict[str, Any],
*, *,
...@@ -161,7 +285,8 @@ def build_tool_call_guided_decoding( ...@@ -161,7 +285,8 @@ def build_tool_call_guided_decoding(
) )
constraint = ( constraint = (
"json_schema", "json_schema",
get_json_schema_constraint( _call_with_optional_parallel_tool_calls(
get_json_schema_constraint,
sglang_tools, sglang_tools,
sglang_tool_choice, sglang_tool_choice,
parallel_tool_calls=parallel_tool_calls, parallel_tool_calls=parallel_tool_calls,
...@@ -172,7 +297,8 @@ def build_tool_call_guided_decoding( ...@@ -172,7 +297,8 @@ def build_tool_call_guided_decoding(
tools=sglang_tools, tools=sglang_tools,
tool_call_parser=tool_call_parser_name, tool_call_parser=tool_call_parser_name,
) )
constraint = parser.get_structure_constraint( constraint = _call_with_optional_parallel_tool_calls(
parser.get_structure_constraint,
tool_choice, tool_choice,
parallel_tool_calls=parallel_tool_calls, parallel_tool_calls=parallel_tool_calls,
) )
...@@ -239,30 +365,38 @@ def preprocess_chat_request( ...@@ -239,30 +365,38 @@ def preprocess_chat_request(
f"present in tools (available: {sorted(available_names) or 'none'})" f"present in tools (available: {sorted(available_names) or 'none'})"
) )
template_tools = _filter_template_tools(
request,
exclude_tools_when_tool_choice_none=exclude_tools_when_tool_choice_none,
)
if _should_use_deepseek_v4_encoding(
request,
tokenizer=tokenizer,
tool_call_parser_name=tool_call_parser_name,
reasoning_parser_name=reasoning_parser_name,
):
prompt_token_ids = _render_deepseek_v4_prompt_token_ids(
request,
messages=messages,
tokenizer=tokenizer,
template_tools=template_tools,
)
else:
# Build template kwargs -- single call for rendering + tokenization # Build template kwargs -- single call for rendering + tokenization
template_kwargs: dict[str, Any] = { template_kwargs: dict[str, Any] = {
"add_generation_prompt": True, "add_generation_prompt": True,
"tokenize": True, "tokenize": True,
} }
# Strip tools from template when tool_choice=none so the model doesn't if template_tools:
# see them and generate raw XML tool calls in its response. template_kwargs["tools"] = template_tools
# When tool_choice names a specific function, only include that tool
# in the template so the model doesn't see irrelevant definitions.
if sglang_tools and not (
exclude_tools_when_tool_choice_none and tool_choice == "none"
):
if _is_named_tool_choice(tool_choice):
chosen_name = tool_choice["function"]["name"]
template_kwargs["tools"] = [
t.model_dump() for t in sglang_tools if t.function.name == chosen_name
]
else:
template_kwargs["tools"] = [t.model_dump() for t in sglang_tools]
prompt_token_ids = _normalize_prompt_token_ids( prompt_token_ids = _normalize_prompt_token_ids(
tokenizer.apply_chat_template(messages, **template_kwargs) tokenizer.apply_chat_template(messages, **template_kwargs)
) )
# Build parsers after rendering, so DeepSeek-V4 can use its custom encoder
# while still sharing the existing Dynamo parser/guided-decoding behavior.
tool_call_parser, reasoning_parser = create_parsers( tool_call_parser, reasoning_parser = create_parsers(
request, request,
tool_call_parser_name=tool_call_parser_name, tool_call_parser_name=tool_call_parser_name,
......
...@@ -11,12 +11,15 @@ Parallels test_vllm_unit.py for the vLLM backend. ...@@ -11,12 +11,15 @@ Parallels test_vllm_unit.py for the vLLM backend.
import json import json
import sys
import types
import pytest import pytest
from sglang.srt.function_call.function_call_parser import FunctionCallParser from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.function_call.json_array_parser import JsonArrayParser from sglang.srt.function_call.json_array_parser import JsonArrayParser
from sglang.srt.utils.hf_transformers_utils import get_tokenizer from sglang.srt.utils.hf_transformers_utils import get_tokenizer
import dynamo.frontend.sglang_prepost as sglang_prepost_module
import dynamo.frontend.sglang_processor as sglang_processor_module import dynamo.frontend.sglang_processor as sglang_processor_module
from dynamo.frontend.sglang_prepost import ( from dynamo.frontend.sglang_prepost import (
SglangPreprocessResult, SglangPreprocessResult,
...@@ -445,6 +448,85 @@ class TestBuildToolCallGuidedDecoding: ...@@ -445,6 +448,85 @@ class TestBuildToolCallGuidedDecoding:
assert isinstance(guided, dict) assert isinstance(guided, dict)
assert "json" in guided assert "json" in guided
def test_required_tool_choice_supports_older_sglang_constraint_signature(
self, monkeypatch
):
tools = convert_tools(
[
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
)
def old_get_json_schema_constraint(sglang_tools, tool_choice):
assert sglang_tools == tools
assert tool_choice == "required"
return {"type": "array", "items": {"type": "object"}}
monkeypatch.setattr(
sglang_prepost_module,
"get_json_schema_constraint",
old_get_json_schema_constraint,
)
guided = build_tool_call_guided_decoding(
{"tool_choice": "required", "parallel_tool_calls": False},
tool_call_parser_name=None,
sglang_tools=tools,
)
assert guided == {"json": {"type": "array", "items": {"type": "object"}}}
def test_auto_tool_choice_supports_older_structure_constraint_signature(
self, monkeypatch
):
tools = convert_tools(
[
{
"type": "function",
"function": {
"name": "get_weather",
"strict": True,
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
)
class OldFunctionCallParser:
def __init__(self, *, tools, tool_call_parser):
self.tools = tools
self.tool_call_parser = tool_call_parser
def get_structure_constraint(self, tool_choice):
assert tool_choice == "auto"
return "structural_tag", {"type": "object"}
monkeypatch.setattr(
sglang_prepost_module,
"FunctionCallParser",
OldFunctionCallParser,
)
guided = build_tool_call_guided_decoding(
{"tool_choice": "auto", "parallel_tool_calls": False},
tool_call_parser_name="kimi_k2",
sglang_tools=tools,
)
assert guided == {"structural_tag": {"type": "object"}}
def test_auto_strict_tools_can_build_structural_tag_guidance(self): def test_auto_strict_tools_can_build_structural_tag_guidance(self):
tools = convert_tools( tools = convert_tools(
[ [
...@@ -990,6 +1072,240 @@ class TestPreprocessChatRequest: ...@@ -990,6 +1072,240 @@ class TestPreprocessChatRequest:
) )
assert len(with_system.prompt_token_ids) > len(without_system.prompt_token_ids) assert len(with_system.prompt_token_ids) > len(without_system.prompt_token_ids)
def test_deepseek_v4_uses_sglang_encoder_when_chat_template_missing(
self, monkeypatch
):
"""DeepSeek-V4 uses SGLang's encoder instead of HF chat_template."""
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
captured["thinking_mode"] = thinking_mode
captured["reasoning_effort"] = reasoning_effort
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def apply_chat_template(self, *args, **kwargs):
raise AssertionError("apply_chat_template should not be called")
def encode(self, prompt):
assert prompt == "<dsv4-prompt>"
return [1, 2, 3]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {
"thinking": True,
"reasoning_effort": "max",
},
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
],
}
result = preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
)
assert result.prompt_token_ids == [1, 2, 3]
assert captured["thinking_mode"] == "thinking"
assert captured["reasoning_effort"] == "max"
assert captured["messages"][0]["role"] == "system"
assert captured["messages"][0]["tools"][0]["function"]["name"] == "get_weather"
assert captured["messages"][1]["role"] == "user"
def test_deepseek_v4_named_tool_choice_filters_encoder_tools(self, monkeypatch):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [1]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
"tools": [
{
"type": "function",
"function": {"name": "get_weather", "parameters": {}},
},
{
"type": "function",
"function": {"name": "get_time", "parameters": {}},
},
],
"tool_choice": {
"type": "function",
"function": {"name": "get_time"},
},
}
preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
)
tools = captured["messages"][0]["tools"]
assert [tool["function"]["name"] for tool in tools] == ["get_time"]
def test_deepseek_v4_respects_existing_chat_template(self, monkeypatch):
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
raise AssertionError("encoding_dsv4 should not be called")
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class TemplateTokenizer:
chat_template = (
"{% for message in messages %}{{ message.content }}{% endfor %}"
)
def apply_chat_template(self, messages, **kwargs):
assert kwargs["add_generation_prompt"] is True
assert kwargs["tokenize"] is True
return [4, 5, 6]
def encode(self, prompt):
raise AssertionError("encode should not be called")
result = preprocess_chat_request(
{
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
},
tokenizer=TemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
)
assert result.prompt_token_ids == [4, 5, 6]
def test_deepseek_v4_normalizes_none_content_without_mutating_request(
self, monkeypatch
):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [7]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "assistant", "content": None}],
}
result = preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
)
assert result.prompt_token_ids == [7]
assert captured["messages"] == [{"role": "assistant", "content": ""}]
assert request["messages"] == [{"role": "assistant", "content": None}]
def test_deepseek_v4_tool_choice_none_strips_encoder_tools(self, monkeypatch):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [8]
preprocess_chat_request(
{
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "system", "content": "Stay terse."}],
"tools": [
{
"type": "function",
"function": {"name": "get_weather", "parameters": {}},
}
],
"tool_choice": "none",
},
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
exclude_tools_when_tool_choice_none=True,
)
assert "tools" not in captured["messages"][0]
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# SglangStreamingPostProcessor: incremental detokenization # SglangStreamingPostProcessor: incremental detokenization
......
...@@ -15,13 +15,96 @@ removed. When the old version falls outside the support window, delete the ...@@ -15,13 +15,96 @@ removed. When the old version falls outside the support window, delete the
fallback and any associated polyfills. fallback and any associated polyfills.
""" """
import inspect
import ipaddress import ipaddress
import logging import logging
import socket import socket
from functools import lru_cache
from typing import Any from typing import Any
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Top-level sglang exports: Engine, ServerArgs
#
# Some SGLang dev builds (including 0.5.x snapshots) do not re-export these
# from sglang/__init__.py, while Dynamo historically uses `import sglang as sgl`
# followed by `sgl.Engine(...)` throughout this backend.
# ---------------------------------------------------------------------------
def ensure_sglang_top_level_exports() -> None:
"""Restore top-level SGLang exports omitted by some install flavors."""
import sglang as sgl
if not hasattr(sgl, "Engine"):
from sglang.srt.entrypoints.engine import Engine
sgl.Engine = Engine
if not hasattr(sgl, "ServerArgs"):
from sglang.srt.server_args import ServerArgs
sgl.ServerArgs = ServerArgs
ensure_sglang_top_level_exports()
@lru_cache(maxsize=32)
def _get_async_generate_supported_kwarg_names(
async_generate: Any,
) -> frozenset[str] | None:
"""Return supported async_generate keyword names, or None for **kwargs."""
try:
signature = inspect.signature(async_generate)
except (TypeError, ValueError):
logger.debug(
"Could not inspect SGLang Engine.async_generate signature; "
"dropping optional compatibility kwargs"
)
return frozenset()
names: set[str] = set()
for name, param in signature.parameters.items():
if param.kind == inspect.Parameter.VAR_KEYWORD:
return None
if param.kind in (
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
):
names.add(name)
return frozenset(names)
def filter_supported_async_generate_kwargs(
engine: Any, kwargs: dict[str, Any]
) -> dict[str, Any]:
"""Return only async_generate kwargs accepted by this SGLang engine.
SGLang occasionally adds optional Engine.async_generate kwargs before every
supported install flavor has them. Keep the compatibility boundary narrow:
callers decide which kwargs are optional, and this helper only drops those
optional kwargs when the installed engine cannot accept them.
"""
async_generate = engine.async_generate
signature_source = getattr(async_generate, "__func__", async_generate)
try:
supported_kwarg_names = _get_async_generate_supported_kwarg_names(
signature_source
)
except TypeError:
supported_kwarg_names = _get_async_generate_supported_kwarg_names.__wrapped__(
signature_source
)
if supported_kwarg_names is None:
return kwargs
return {key: value for key, value in kwargs.items() if key in supported_kwarg_names}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Network utilities: NetworkAddress, get_local_ip_auto, get_zmq_socket # Network utilities: NetworkAddress, get_local_ip_auto, get_zmq_socket
# #
...@@ -201,6 +284,8 @@ def enable_disjoint_streaming_output(server_args: Any) -> None: ...@@ -201,6 +284,8 @@ def enable_disjoint_streaming_output(server_args: Any) -> None:
__all__ = [ __all__ = [
"NetworkAddress", "NetworkAddress",
"enable_disjoint_streaming_output", "enable_disjoint_streaming_output",
"ensure_sglang_top_level_exports",
"filter_supported_async_generate_kwargs",
"get_local_ip_auto", "get_local_ip_auto",
"get_scheduler_info", "get_scheduler_info",
"get_zmq_socket", "get_zmq_socket",
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import asyncio import asyncio
import json import json
import logging import logging
......
...@@ -13,6 +13,7 @@ from dynamo._core import Context ...@@ -13,6 +13,7 @@ from dynamo._core import Context
from dynamo.common.constants import DisaggregationMode from dynamo.common.constants import DisaggregationMode
from dynamo.common.utils.engine_response import normalize_finish_reason from dynamo.common.utils.engine_response import normalize_finish_reason
from dynamo.common.utils.otel_tracing import build_trace_headers from dynamo.common.utils.otel_tracing import build_trace_headers
from dynamo.sglang._compat import filter_supported_async_generate_kwargs
from dynamo.sglang.args import Config from dynamo.sglang.args import Config
from dynamo.sglang.publisher import DynamoSglangPublisher from dynamo.sglang.publisher import DynamoSglangPublisher
from dynamo.sglang.request_handlers.handler_base import BaseWorkerHandler from dynamo.sglang.request_handlers.handler_base import BaseWorkerHandler
...@@ -275,6 +276,9 @@ class DecodeWorkerHandler(BaseWorkerHandler): ...@@ -275,6 +276,9 @@ class DecodeWorkerHandler(BaseWorkerHandler):
return_routed_experts = getattr( return_routed_experts = getattr(
self.config.server_args, "enable_return_routed_experts", False self.config.server_args, "enable_return_routed_experts", False
) )
routed_experts_kwargs = filter_supported_async_generate_kwargs(
self.engine, {"return_routed_experts": return_routed_experts}
)
priority = (request.get("routing") or {}).get("priority") priority = (request.get("routing") or {}).get("priority")
logprob_kwargs = self._build_logprob_kwargs(request) logprob_kwargs = self._build_logprob_kwargs(request)
...@@ -308,7 +312,7 @@ class DecodeWorkerHandler(BaseWorkerHandler): ...@@ -308,7 +312,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
**input_param, **input_param,
sampling_params=sampling_params, sampling_params=sampling_params,
stream=True, stream=True,
return_routed_experts=return_routed_experts, **routed_experts_kwargs,
bootstrap_host=bootstrap_info["bootstrap_host"], bootstrap_host=bootstrap_info["bootstrap_host"],
bootstrap_port=bootstrap_info["bootstrap_port"], bootstrap_port=bootstrap_info["bootstrap_port"],
bootstrap_room=bootstrap_info["bootstrap_room"], bootstrap_room=bootstrap_info["bootstrap_room"],
...@@ -346,7 +350,7 @@ class DecodeWorkerHandler(BaseWorkerHandler): ...@@ -346,7 +350,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
video_data=video_data, video_data=video_data,
sampling_params=sampling_params, sampling_params=sampling_params,
stream=True, stream=True,
return_routed_experts=return_routed_experts, **routed_experts_kwargs,
external_trace_header=trace_header, external_trace_header=trace_header,
rid=trace_id, rid=trace_id,
data_parallel_rank=dp_rank, data_parallel_rank=dp_rank,
......
...@@ -11,6 +11,11 @@ import pytest ...@@ -11,6 +11,11 @@ import pytest
import yaml import yaml
from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST
import dynamo.sglang._compat as sglang_compat
from dynamo.sglang._compat import (
ensure_sglang_top_level_exports,
filter_supported_async_generate_kwargs,
)
from dynamo.sglang.args import parse_args from dynamo.sglang.args import parse_args
from dynamo.sglang.health_check import ( from dynamo.sglang.health_check import (
SglangDisaggHealthCheckPayload, SglangDisaggHealthCheckPayload,
...@@ -38,6 +43,99 @@ pytestmark = [ ...@@ -38,6 +43,99 @@ pytestmark = [
mock_sglang_cli = make_cli_args_fixture("dynamo.sglang") mock_sglang_cli = make_cli_args_fixture("dynamo.sglang")
def test_compat_restores_sglang_top_level_exports():
"""Dynamo supports SGLang builds that omit top-level Engine/ServerArgs."""
import sglang as sgl
from sglang.srt.entrypoints.engine import Engine
from sglang.srt.server_args import ServerArgs
missing = object()
original_engine = getattr(sgl, "Engine", missing)
original_server_args = getattr(sgl, "ServerArgs", missing)
try:
if hasattr(sgl, "Engine"):
delattr(sgl, "Engine")
if hasattr(sgl, "ServerArgs"):
delattr(sgl, "ServerArgs")
ensure_sglang_top_level_exports()
assert sgl.Engine is Engine
assert sgl.ServerArgs is ServerArgs
finally:
if original_engine is missing:
if hasattr(sgl, "Engine"):
delattr(sgl, "Engine")
else:
sgl.Engine = original_engine
if original_server_args is missing:
if hasattr(sgl, "ServerArgs"):
delattr(sgl, "ServerArgs")
else:
sgl.ServerArgs = original_server_args
def test_compat_filters_async_generate_kwargs_for_older_engines():
class OldEngine:
async def async_generate(self, input_ids=None, sampling_params=None):
return None
kwargs = {
"input_ids": [1, 2, 3],
"return_routed_experts": True,
}
assert filter_supported_async_generate_kwargs(OldEngine(), kwargs) == {
"input_ids": [1, 2, 3]
}
def test_compat_keeps_async_generate_kwargs_for_newer_engines():
class NewEngine:
async def async_generate(self, return_routed_experts=False):
return None
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(NewEngine(), kwargs) == kwargs
def test_compat_keeps_async_generate_kwargs_for_variadic_engines():
class VariadicEngine:
async def async_generate(self, **kwargs):
return None
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(VariadicEngine(), kwargs) == kwargs
def test_compat_caches_async_generate_signature_inspection(monkeypatch):
class CachedEngine:
async def async_generate(self, return_routed_experts=False):
return None
sglang_compat._get_async_generate_supported_kwarg_names.cache_clear()
calls = 0
original_signature = sglang_compat.inspect.signature
def counting_signature(obj):
nonlocal calls
calls += 1
return original_signature(obj)
monkeypatch.setattr(sglang_compat.inspect, "signature", counting_signature)
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(CachedEngine(), kwargs) == kwargs
assert filter_supported_async_generate_kwargs(CachedEngine(), kwargs) == kwargs
assert calls == 1
sglang_compat._get_async_generate_supported_kwarg_names.cache_clear()
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_custom_jinja_template_invalid_path(mock_sglang_cli): async def test_custom_jinja_template_invalid_path(mock_sglang_cli):
"""Test that invalid file path raises FileNotFoundError.""" """Test that invalid file path raises FileNotFoundError."""
......
...@@ -1227,8 +1227,9 @@ impl OpenAIPreprocessor { ...@@ -1227,8 +1227,9 @@ impl OpenAIPreprocessor {
/// For kimi_k25: disabled when chat_template_args contains "thinking": false. /// For kimi_k25: disabled when chat_template_args contains "thinking": false.
/// For nemotron_nano: disabled when chat_template_args contains "enable_thinking": false /// For nemotron_nano: disabled when chat_template_args contains "enable_thinking": false
/// or "force_nonempty_content": true. /// or "force_nonempty_content": true.
/// For deepseek_r1: disabled when chat_template_args contains "thinking": false /// For deepseek_r1 / deepseek_v4: disabled when chat_template_args contains
/// or "thinking_mode": "chat". /// "thinking": false or "thinking_mode": "chat" — matches the V4 formatter's
/// `resolve_thinking_mode` convention, so the parser and the prompt stay in sync.
fn is_reasoning_disabled_by_request( fn is_reasoning_disabled_by_request(
reasoning_parser: Option<&str>, reasoning_parser: Option<&str>,
chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>, chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>,
...@@ -1257,7 +1258,8 @@ impl OpenAIPreprocessor { ...@@ -1257,7 +1258,8 @@ impl OpenAIPreprocessor {
} }
false false
} }
Some("deepseek_r1") => { Some("deepseek_r1") | Some("deepseek_v4") | Some("deepseek-v4")
| Some("deepseekv4") => {
if let Some(args) = chat_template_args { if let Some(args) = chat_template_args {
if let Some(thinking) = args.get("thinking") { if let Some(thinking) = args.get("thinking") {
return thinking == &serde_json::Value::Bool(false); return thinking == &serde_json::Value::Bool(false);
...@@ -1829,6 +1831,50 @@ mod tests { ...@@ -1829,6 +1831,50 @@ mod tests {
false, false,
"nemotron_nano + empty args → enabled", "nemotron_nano + empty args → enabled",
), ),
// deepseek_v4 — same convention as deepseek_r1; verify all three aliases
// (deepseek_v4 / deepseek-v4 / deepseekv4) plus both signal keys.
(
Some("deepseek_v4"),
Some(&thinking_false),
true,
"deepseek_v4 + thinking=false → disabled",
),
(
Some("deepseek_v4"),
Some(&thinking_true),
false,
"deepseek_v4 + thinking=true → enabled",
),
(
Some("deepseek_v4"),
Some(&thinking_mode_chat),
true,
"deepseek_v4 + thinking_mode=chat → disabled",
),
(
Some("deepseek_v4"),
Some(&thinking_mode_thinking),
false,
"deepseek_v4 + thinking_mode=thinking → enabled",
),
(
Some("deepseek_v4"),
None,
false,
"deepseek_v4 + no args → enabled",
),
(
Some("deepseek-v4"),
Some(&thinking_false),
true,
"deepseek-v4 (hyphen alias) + thinking=false → disabled",
),
(
Some("deepseekv4"),
Some(&thinking_mode_chat),
true,
"deepseekv4 (joined alias) + thinking_mode=chat → disabled",
),
]; ];
for (parser, args, expected, desc) in cases { for (parser, args, expected, desc) in cases {
......
...@@ -26,6 +26,7 @@ use std::sync::Arc; ...@@ -26,6 +26,7 @@ use std::sync::Arc;
use crate::preprocessor::media::MediaDecoder; use crate::preprocessor::media::MediaDecoder;
pub mod deepseek_v32; pub mod deepseek_v32;
pub mod deepseek_v4;
mod template; mod template;
pub use template::{ChatTemplate, ContextMixins}; pub use template::{ChatTemplate, ContextMixins};
......
This diff is collapsed.
...@@ -19,12 +19,35 @@ use tokcfg::ChatTemplateValue; ...@@ -19,12 +19,35 @@ use tokcfg::ChatTemplateValue;
impl PromptFormatter { impl PromptFormatter {
pub fn from_mdc(mdc: &ModelDeploymentCard) -> Result<PromptFormatter> { pub fn from_mdc(mdc: &ModelDeploymentCard) -> Result<PromptFormatter> {
// Special handling for DeepSeek-V3.2(-Speciale) which doesn't provide Jinja chat_template // Special handling for DeepSeek models whose HF repos don't ship a Jinja chat_template.
let name_lower = mdc.display_name.to_lowercase(); //
if name_lower.contains("deepseek") // Prefer the authoritative `model_type` from config.json — it's set by
&& name_lower.contains("v3.2") // the model author and survives any `--served-model-name` rename. Fall
&& !name_lower.contains("exp") // back to a tight substring match on `display_name` only when config.json
{ // is absent (e.g., tokenizer-only MDCs) or unreadable.
//
// An empty `model_type` string (rare but legal in the JSON) carries
// no signal — normalize it to `None` so the display-name fallback
// still runs instead of being silently suppressed.
let model_type_lower = mdc
.model_info
.as_ref()
.and_then(|info| info.get_model_info().ok())
.map(|info| info.model_type().to_lowercase())
.filter(|s| !s.is_empty());
let display_name_lower = mdc.display_name.to_lowercase();
if is_deepseek_v4(&model_type_lower, &display_name_lower) {
tracing::info!(
model_type = ?model_type_lower,
display_name = %mdc.display_name,
"Detected DeepSeek V4 model, using native Rust formatter",
);
return Ok(Self::OAI(Arc::new(
super::deepseek_v4::DeepSeekV4Formatter::new_thinking(),
)));
}
if is_deepseek_v3_2_non_exp(&model_type_lower, &display_name_lower) {
tracing::info!("Detected DeepSeek V3.2 model (non-Exp), using native Rust formatter"); tracing::info!("Detected DeepSeek V3.2 model (non-Exp), using native Rust formatter");
return Ok(Self::OAI(Arc::new( return Ok(Self::OAI(Arc::new(
super::deepseek_v32::DeepSeekV32Formatter::new_thinking(), super::deepseek_v32::DeepSeekV32Formatter::new_thinking(),
...@@ -187,3 +210,158 @@ struct HfTokenizerConfigJsonFormatter { ...@@ -187,3 +210,158 @@ struct HfTokenizerConfigJsonFormatter {
pub struct ContextMixins { pub struct ContextMixins {
context_mixins: HashSet<PromptContextMixin>, context_mixins: HashSet<PromptContextMixin>,
} }
/// Decides whether to activate the DeepSeek-V4 native formatter.
///
/// Primary signal: config.json `model_type`. DeepSeek-V4-Pro and V4-Flash both
/// ship `"model_type": "deepseek_v4"`, set by the model author — this survives
/// any `--served-model-name` rename.
///
/// Fallback: `display_name`, tight-matched against
/// `^deepseek(?:[-_.])?v4(?:[-_.]|$)`. Only consulted when config.json is
/// absent (tokenizer-only MDCs) or unreadable; a concrete config.json value
/// that is *not* `deepseek_v4` is authoritative and suppresses the fallback.
fn is_deepseek_v4(model_type_lower: &Option<String>, display_name_lower: &str) -> bool {
match model_type_lower.as_deref() {
Some("deepseek_v4") => true,
Some(_) => false, // config.json says something else — trust it
None => is_deepseek_v4_name(display_name_lower),
}
}
/// Decides whether to activate the DeepSeek-V3.2 (non-Exp) native formatter.
/// Same config-primary / name-fallback rule as V4.
fn is_deepseek_v3_2_non_exp(model_type_lower: &Option<String>, display_name_lower: &str) -> bool {
let name_match = display_name_lower.contains("deepseek")
&& display_name_lower.contains("v3.2")
&& !display_name_lower.contains("exp");
match model_type_lower.as_deref() {
Some("deepseek_v3_2") => !display_name_lower.contains("exp"),
Some(_) => false,
None => name_match,
}
}
/// Tight, anchored match for DeepSeek-V4 display names. Equivalent to the
/// regex `^deepseek(?:[-_.])?v4(?:[-_.]|$)` over an already-lowercased string.
/// Written with string ops to avoid pulling in the `regex` crate.
///
/// Rejects composite names that previously short-circuited the V4 branch:
/// - `deepseek-v3.2-v4-foo` (the `v3.2` variant is the real one)
/// - `deepseek-v40` / `deepseek-v4pro` (no separator after `v4`)
/// - `my-deepseek-v4` (prefix must be at the start)
fn is_deepseek_v4_name(name_lower: &str) -> bool {
let Some(rest) = name_lower.strip_prefix("deepseek") else {
return false;
};
// Optional single separator between "deepseek" and "v4".
let rest = rest
.strip_prefix(|c: char| matches!(c, '-' | '_' | '.'))
.unwrap_or(rest);
let Some(after_v4) = rest.strip_prefix("v4") else {
return false;
};
// `v4` must end the name or be followed by a separator — anything else
// (e.g. `v40`, `v4pro`) is a different model family.
after_v4.is_empty() || after_v4.starts_with(['-', '_', '.'])
}
#[cfg(test)]
mod detection_tests {
use super::{is_deepseek_v3_2_non_exp, is_deepseek_v4, is_deepseek_v4_name};
#[test]
fn v4_name_matches_canonical_variants() {
for name in [
"deepseek-v4",
"deepseek_v4",
"deepseek.v4",
"deepseekv4",
"deepseek-v4-pro",
"deepseek-v4-flash",
"deepseek-v4-flash-2507",
"deepseek-v4.1",
"deepseek_v4_thinking",
] {
assert!(is_deepseek_v4_name(name), "expected {name} to match V4");
}
}
#[test]
fn v4_name_rejects_non_v4() {
// Composite names that previously short-circuited to V4 before the
// V3.2 branch — now correctly rejected.
for name in [
"deepseek-v3.2-v4-foo",
"my-deepseek-v4",
"deepseek-v40",
"deepseek-v4pro",
"deepseekv40",
"deepseek-v3",
"deepseek-v3.2",
"deepseek-r1",
"qwen3-v4", // only deepseek-prefixed names qualify
"dsflash",
"",
] {
assert!(
!is_deepseek_v4_name(name),
"expected {name} to NOT match V4",
);
}
}
#[test]
fn v4_detection_prefers_config_model_type() {
// config.json `model_type = "deepseek_v4"` wins regardless of what
// the operator calls the model via --served-model-name.
let v4 = Some("deepseek_v4".to_string());
for display in ["dsflash", "my-pet-model", "llama-3-8b", ""] {
assert!(
is_deepseek_v4(&v4, display),
"config says deepseek_v4, display {display:?} — expected V4",
);
}
// A concrete non-V4 config.json suppresses the display-name fallback.
// Even if the operator names the served model "deepseek-v4", a model
// with `model_type = "llama"` is NOT DeepSeek-V4.
let llama = Some("llama".to_string());
for display in ["deepseek-v4", "deepseek-v4-flash", "anything"] {
assert!(
!is_deepseek_v4(&llama, display),
"config says llama, display {display:?} — expected NOT V4",
);
}
// No config.json — fall back to display-name match.
assert!(is_deepseek_v4(&None, "deepseek-v4-flash"));
assert!(!is_deepseek_v4(&None, "dsflash"));
// A config.json with `"model_type": ""` is treated as "no signal" at
// the call site (normalized to None before is_deepseek_v4 is called),
// so the display-name fallback still runs — pin that contract.
let empty: Option<String> = None;
assert!(is_deepseek_v4(&empty, "deepseek-v4-flash"));
assert!(!is_deepseek_v4(&empty, "dsflash"));
}
#[test]
fn v3_2_detection_prefers_config_model_type() {
// config says deepseek_v3_2, any non-"exp" display name triggers.
let v3_2 = Some("deepseek_v3_2".to_string());
assert!(is_deepseek_v3_2_non_exp(&v3_2, "whatever"));
assert!(is_deepseek_v3_2_non_exp(&v3_2, "deepseek-v3.2"));
// V3.2-Exp is a separate model family; suppress even via config.
assert!(!is_deepseek_v3_2_non_exp(&v3_2, "deepseek-v3.2-exp"));
// Other config types lose regardless of display name.
let other = Some("deepseek_v4".to_string());
assert!(!is_deepseek_v3_2_non_exp(&other, "deepseek-v3.2"));
// No config — fall back to the original display-name heuristic.
assert!(is_deepseek_v3_2_non_exp(&None, "deepseek-v3.2-pro"));
assert!(!is_deepseek_v3_2_non_exp(&None, "deepseek-v3.2-exp"));
assert!(!is_deepseek_v3_2_non_exp(&None, "deepseek-v4"));
}
}
{"tools":[{"type":"function","function":{"name":"get_weather","description":"Get the weather for a specific location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The city name"},"unit":{"type":"string","enum":["celsius","fahrenheit"],"description":"Temperature unit"}},"required":["location"]}}},{"type":"function","function":{"name":"search","description":"Search the web for information","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"},"num_results":{"type":"integer","description":"Number of results to return"}},"required":["query"]}}}],"messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What's the weather in Beijing?"},{"role":"assistant","reasoning_content":"The user wants to know the weather in Beijing. I should use the get_weather tool.","tool_calls":[{"id":"call_001","type":"function","function":{"name":"get_weather","arguments":"{\"location\": \"Beijing\", \"unit\": \"celsius\"}"}}]},{"role":"tool","tool_call_id":"call_001","content":"{\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}"},{"role":"assistant","reasoning_content":"Got the weather data. Let me format a nice response.","content":"The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity."}]}
[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Hello"},{"role":"assistant","reasoning_content":"The user said hello, I should greet back.","content":"Hi there! How can I help you?"},{"role":"user","content":"What is the capital of France?"},{"role":"assistant","reasoning_content":"The user asks about the capital of France. It is Paris.","content":"The capital of France is Paris."}]
[{"role":"system","content":"该助手为DeepSeek,由深度求索公司创造。"},{"role":"latest_reminder","content":"2026-02-21,星期六,广州,App,中文"},{"role":"developer","content":"小柴胡冲剂和布洛芬能一起吃吗?\n\nCITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】","tools":[{"type":"function","function":{"name":"search","description":"Web search. Split multiple queries with '||'.","parameters":{"type":"object","properties":{"queries":{"type":"string","description":"query1||query2"}},"required":["queries"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}},{"type":"function","function":{"name":"open","description":"Batch open IDs (format 【{id}†...】) or URLs.","parameters":{"type":"object","properties":{"open_list":{"type":"array","items":{"type":"object","properties":{"id":{"description":"ID or URL","anyOf":[{"type":"integer"},{"type":"string"}],"default":-1},"cursor":{"type":"integer","description":"","default":-1},"loc":{"type":"integer","description":"Start line","default":-1},"num_lines":{"type":"integer","description":"","default":-1},"view_source":{"type":"boolean","description":"","default":false}},"additionalProperties":false},"description":""}},"required":["open_list"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}},{"type":"function","function":{"name":"find","description":"Find exact text pattern in pages.","parameters":{"type":"object","properties":{"find_list":{"type":"array","items":{"type":"object","properties":{"pattern":{"type":"string","description":""},"cursor":{"type":"integer","description":"","default":-1}},"required":["pattern"],"additionalProperties":false},"description":""}},"required":["find_list"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}}]},{"role":"assistant","content":"","reasoning_content":"用户想知道小柴胡冲剂和布洛芬能否一起服用。","tool_calls":[{"type":"function","function":{"name":"search","arguments":"{\"queries\": \"小柴胡冲剂 布洛芬 相互作用 一起吃\"}"}}]},{"role":"tool","content":"[0]"},{"role":"assistant","content":"请及时就医。","reasoning_content":"现在开始组织回答。","tool_calls":[]}]
[{"role":"system","content":"该助手为DeepSeek-V3,由深度求索公司创造。\n今天是2025年10月17日,星期五。"},{"role":"latest_reminder","content":"2024-11-15,上海市,App,中文"},{"role":"user","content":"热海大滚锅是世界著名温泉吗"},{"role":"assistant","content":"关于热海大滚锅是否是世界著名温泉,可以这样总结:它在**中国乃至全球的地热奇观中占有重要地位**,但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**,而非普遍的温泉疗养体验。\n\n为了让你快速了解,我整理了一个简要的表格:\n\n| 维度 | 热海大滚锅的地位与特点 |\n| :--- | :--- |\n| **地质奇观** | **世界罕见**的高温地热系统,泉眼多、水温高、形态多样。 |\n| **国内声誉** | **中国三大地热区之一**,**国家5A级旅游景区**,自古闻名(徐霞客曾游历并记载)。 |\n| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一,但此说法流传不广,其国际声誉更多建立在地质独特性上。 |\n| **核心体验** | **观赏地热奇观**(如97℃沸腾的“大滚锅”)、**体验温泉煮鸡蛋**。 |\n\n### 💡 游玩攻略与温馨提示\n\n如果你计划前往热海大滚锅,这里有一些实用信息供你参考:\n\n- **门票与开放时间**:\n - **门票**:景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐,价格会更高,例如约**288元**。\n - **开放时间**:景区一般**08:00-18:00**开放,但具体时间可能变动,建议提前核实。\n\n- **特色体验**:\n - **温泉煮鸡蛋**:这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋(约5-8元/串),然后到“大滚锅”旁的指定区域蒸煮,几分钟便可熟食,趣味十足。\n - **金汤足浴**:可以直接用从“大滚锅”流出的温泉水泡脚,缓解旅途疲劳。\n\n- **注意事项**:\n - **安全第一**:“大滚锅”水温极高,务必遵守游览规则,在指定区域内观赏,切勿随意触碰泉水。\n - **规划行程**:建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路,出入口有观光车接送。\n\n希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问,我很乐意提供进一步的信息。","mask":1},{"role":"user","content":"世界著名温泉有哪些","task":"action"},{"role":"assistant","content":"Search"}]
<|begin▁of▁sentence|>You are a helpful assistant.
## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<|DSML|tool_calls>" block like the following:
<|DSML|tool_calls>
<|DSML|invoke name="$TOOL_NAME">
<|DSML|parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</|DSML|parameter>
...
</|DSML|invoke>
<|DSML|invoke name="$TOOL_NAME2">
...
</|DSML|invoke>
</|DSML|tool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
{"name": "get_weather", "description": "Get the weather for a specific location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}}, "required": ["location"]}}
{"name": "search", "description": "Search the web for information", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}, "num_results": {"type": "integer", "description": "Number of results to return"}}, "required": ["query"]}}
You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
<|User|>What's the weather in Beijing?<|Assistant|><think>The user wants to know the weather in Beijing. I should use the get_weather tool.</think>
<|DSML|tool_calls>
<|DSML|invoke name="get_weather">
<|DSML|parameter name="location" string="true">Beijing</|DSML|parameter>
<|DSML|parameter name="unit" string="true">celsius</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls><|end▁of▁sentence|><|User|><tool_result>{"temperature": 22, "condition": "sunny", "humidity": 45}</tool_result><|Assistant|><think>Got the weather data. Let me format a nice response.</think>The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity.<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>You are a helpful assistant.<|User|>Hello<|Assistant|></think>Hi there! How can I help you?<|end▁of▁sentence|><|User|>What is the capital of France?<|Assistant|><think>The user asks about the capital of France. It is Paris.</think>The capital of France is Paris.<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>该助手为DeepSeek,由深度求索公司创造。<|latest_reminder|>2026-02-21,星期六,广州,App,中文<|User|>小柴胡冲剂和布洛芬能一起吃吗?
CITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】
## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<|DSML|tool_calls>" block like the following:
<|DSML|tool_calls>
<|DSML|invoke name="$TOOL_NAME">
<|DSML|parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</|DSML|parameter>
...
</|DSML|invoke>
<|DSML|invoke name="$TOOL_NAME2">
...
</|DSML|invoke>
</|DSML|tool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
{"name": "search", "description": "Web search. Split multiple queries with '||'.", "parameters": {"type": "object", "properties": {"queries": {"type": "string", "description": "query1||query2"}}, "required": ["queries"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
{"name": "open", "description": "Batch open IDs (format 【{id}†...】) or URLs.", "parameters": {"type": "object", "properties": {"open_list": {"type": "array", "items": {"type": "object", "properties": {"id": {"description": "ID or URL", "anyOf": [{"type": "integer"}, {"type": "string"}], "default": -1}, "cursor": {"type": "integer", "description": "", "default": -1}, "loc": {"type": "integer", "description": "Start line", "default": -1}, "num_lines": {"type": "integer", "description": "", "default": -1}, "view_source": {"type": "boolean", "description": "", "default": false}}, "additionalProperties": false}, "description": ""}}, "required": ["open_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
{"name": "find", "description": "Find exact text pattern in pages.", "parameters": {"type": "object", "properties": {"find_list": {"type": "array", "items": {"type": "object", "properties": {"pattern": {"type": "string", "description": ""}, "cursor": {"type": "integer", "description": "", "default": -1}}, "required": ["pattern"], "additionalProperties": false}, "description": ""}}, "required": ["find_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
<|Assistant|><think>用户想知道小柴胡冲剂和布洛芬能否一起服用。</think>
<|DSML|tool_calls>
<|DSML|invoke name="search">
<|DSML|parameter name="queries" string="true">小柴胡冲剂 布洛芬 相互作用 一起吃</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls><|end▁of▁sentence|><|User|><tool_result>[0]</tool_result><|Assistant|><think>现在开始组织回答。</think>请及时就医。<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>该助手为DeepSeek-V3,由深度求索公司创造。
今天是2025年10月17日,星期五。<|latest_reminder|>2024-11-15,上海市,App,中文<|User|>热海大滚锅是世界著名温泉吗<|Assistant|></think>关于热海大滚锅是否是世界著名温泉,可以这样总结:它在**中国乃至全球的地热奇观中占有重要地位**,但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**,而非普遍的温泉疗养体验。
为了让你快速了解,我整理了一个简要的表格:
| 维度 | 热海大滚锅的地位与特点 |
| :--- | :--- |
| **地质奇观** | **世界罕见**的高温地热系统,泉眼多、水温高、形态多样。 |
| **国内声誉** | **中国三大地热区之一**,**国家5A级旅游景区**,自古闻名(徐霞客曾游历并记载)。 |
| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一,但此说法流传不广,其国际声誉更多建立在地质独特性上。 |
| **核心体验** | **观赏地热奇观**(如97℃沸腾的“大滚锅”)、**体验温泉煮鸡蛋**。 |
### 💡 游玩攻略与温馨提示
如果你计划前往热海大滚锅,这里有一些实用信息供你参考:
- **门票与开放时间**:
- **门票**:景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐,价格会更高,例如约**288元**。
- **开放时间**:景区一般**08:00-18:00**开放,但具体时间可能变动,建议提前核实。
- **特色体验**:
- **温泉煮鸡蛋**:这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋(约5-8元/串),然后到“大滚锅”旁的指定区域蒸煮,几分钟便可熟食,趣味十足。
- **金汤足浴**:可以直接用从“大滚锅”流出的温泉水泡脚,缓解旅途疲劳。
- **注意事项**:
- **安全第一**:“大滚锅”水温极高,务必遵守游览规则,在指定区域内观赏,切勿随意触碰泉水。
- **规划行程**:建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路,出入口有观光车接送。
希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问,我很乐意提供进一步的信息。<|end▁of▁sentence|><|User|>世界著名温泉有哪些<|Assistant|></think><|action|>Search<|end▁of▁sentence|>
\ No newline at end of file
{
"request_id": "deepseek-v4-content-before-tool-test",
"expected_output": {"normal_content": "Let me check the forecast for Tokyo right now.", "reasoning_content": "The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<think>The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather.</think>","role":"assistant","reasoning_content":"The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"Let me check the forecast for Tokyo right now.","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Tokyo</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"unit\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment