Unverified Commit 35fa7129 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat(v4): cherry-pick #8665 onto release/deepseekv4 (#8709)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 01002df7
......@@ -84,7 +84,7 @@ $global:copyright_results = @{
$ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt', 'exclusions.txt')
write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models', 'lib/llm/tests/data/deepseek-v3.2')
$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models', 'lib/llm/tests/data/deepseek-v3.2', 'lib/llm/tests/data/deepseek-v4')
write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
$ignored_types = @('.bat', '.gif', '.ico', '.ipynb', '.jpg', '.jpeg', '.patch', '.png', '.pyc', '.pyi', '.rst', '.zip', '.md', '.json')
write-debug "<copyright-check> ignored_types = ['$($ignored_types -join "', '")']."
......
......@@ -3,9 +3,12 @@
from __future__ import annotations
import copy
import inspect
import json
import logging
from dataclasses import dataclass
from functools import lru_cache
from typing import Any, TypeAlias
from sglang.srt.entrypoints.openai.protocol import Function as SglangFunction
......@@ -129,6 +132,127 @@ def _is_named_tool_choice(tool_choice: Any) -> bool:
)
def _normalize_deepseek_v4_hint(value: Any) -> str:
return str(value or "").lower().replace("-", "").replace("_", "")
def _should_use_deepseek_v4_encoding(
request: dict[str, Any],
*,
tokenizer,
tool_call_parser_name: str | None,
reasoning_parser_name: str | None,
) -> bool:
if getattr(tokenizer, "chat_template", None) is not None:
return False
return any(
"deepseekv4" in _normalize_deepseek_v4_hint(value)
for value in (
request.get("model"),
tool_call_parser_name,
reasoning_parser_name,
)
)
def _filter_template_tools(
request: dict[str, Any],
*,
exclude_tools_when_tool_choice_none: bool,
) -> list[dict[str, Any]] | None:
raw_tools = request.get("tools") or []
if not raw_tools:
return None
tool_choice = request.get("tool_choice", "auto")
if exclude_tools_when_tool_choice_none and tool_choice == "none":
return None
if _is_named_tool_choice(tool_choice):
chosen_name = tool_choice["function"]["name"]
return [
copy.deepcopy(tool)
for tool in raw_tools
if tool.get("function", {}).get("name") == chosen_name
]
return copy.deepcopy(raw_tools)
def _render_deepseek_v4_prompt_token_ids(
request: dict[str, Any],
*,
messages: list[dict[str, Any]],
tokenizer,
template_tools: list[dict[str, Any]] | None,
) -> list[int]:
try:
from sglang.srt.entrypoints.openai.encoding_dsv4 import encode_messages
except ImportError as exc:
raise ValueError(
"DeepSeek-V4 preprocessing requires SGLang's "
"sglang.srt.entrypoints.openai.encoding_dsv4 encoder. "
"Install an SGLang build that includes the DeepSeek-V4 integration."
) from exc
encoding_messages = copy.deepcopy(messages)
for msg in encoding_messages:
if msg.get("content") is None:
msg["content"] = ""
if template_tools:
if not encoding_messages or encoding_messages[0].get("role") != "system":
encoding_messages.insert(0, {"role": "system", "content": ""})
encoding_messages[0]["tools"] = template_tools
chat_template_kwargs = request.get("chat_template_kwargs") or {}
thinking_mode = "thinking" if chat_template_kwargs.get("thinking") else "chat"
reasoning_effort = (
request.get("reasoning_effort")
or chat_template_kwargs.get("reasoning_effort")
or None
)
if reasoning_effort not in ("max", "high", None):
reasoning_effort = None
prompt = encode_messages(
encoding_messages,
thinking_mode=thinking_mode,
reasoning_effort=reasoning_effort,
)
return _normalize_prompt_token_ids(tokenizer.encode(prompt))
@lru_cache(maxsize=64)
def _callable_accepts_kwarg(func: Any, kwarg: str) -> bool:
try:
signature = inspect.signature(func)
except (TypeError, ValueError):
return False
for name, param in signature.parameters.items():
if param.kind == inspect.Parameter.VAR_KEYWORD:
return True
if name == kwarg and param.kind in (
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
):
return True
return False
def _call_with_optional_parallel_tool_calls(
func: Any,
*args: Any,
parallel_tool_calls: Any,
) -> Any:
"""Call SGLang helpers across versions with/without parallel_tool_calls."""
if _callable_accepts_kwarg(func, "parallel_tool_calls"):
return func(*args, parallel_tool_calls=parallel_tool_calls)
return func(*args)
def build_tool_call_guided_decoding(
request: dict[str, Any],
*,
......@@ -161,7 +285,8 @@ def build_tool_call_guided_decoding(
)
constraint = (
"json_schema",
get_json_schema_constraint(
_call_with_optional_parallel_tool_calls(
get_json_schema_constraint,
sglang_tools,
sglang_tool_choice,
parallel_tool_calls=parallel_tool_calls,
......@@ -172,7 +297,8 @@ def build_tool_call_guided_decoding(
tools=sglang_tools,
tool_call_parser=tool_call_parser_name,
)
constraint = parser.get_structure_constraint(
constraint = _call_with_optional_parallel_tool_calls(
parser.get_structure_constraint,
tool_choice,
parallel_tool_calls=parallel_tool_calls,
)
......@@ -239,30 +365,38 @@ def preprocess_chat_request(
f"present in tools (available: {sorted(available_names) or 'none'})"
)
# Build template kwargs -- single call for rendering + tokenization
template_kwargs: dict[str, Any] = {
"add_generation_prompt": True,
"tokenize": True,
}
# Strip tools from template when tool_choice=none so the model doesn't
# see them and generate raw XML tool calls in its response.
# When tool_choice names a specific function, only include that tool
# in the template so the model doesn't see irrelevant definitions.
if sglang_tools and not (
exclude_tools_when_tool_choice_none and tool_choice == "none"
):
if _is_named_tool_choice(tool_choice):
chosen_name = tool_choice["function"]["name"]
template_kwargs["tools"] = [
t.model_dump() for t in sglang_tools if t.function.name == chosen_name
]
else:
template_kwargs["tools"] = [t.model_dump() for t in sglang_tools]
prompt_token_ids = _normalize_prompt_token_ids(
tokenizer.apply_chat_template(messages, **template_kwargs)
template_tools = _filter_template_tools(
request,
exclude_tools_when_tool_choice_none=exclude_tools_when_tool_choice_none,
)
if _should_use_deepseek_v4_encoding(
request,
tokenizer=tokenizer,
tool_call_parser_name=tool_call_parser_name,
reasoning_parser_name=reasoning_parser_name,
):
prompt_token_ids = _render_deepseek_v4_prompt_token_ids(
request,
messages=messages,
tokenizer=tokenizer,
template_tools=template_tools,
)
else:
# Build template kwargs -- single call for rendering + tokenization
template_kwargs: dict[str, Any] = {
"add_generation_prompt": True,
"tokenize": True,
}
if template_tools:
template_kwargs["tools"] = template_tools
prompt_token_ids = _normalize_prompt_token_ids(
tokenizer.apply_chat_template(messages, **template_kwargs)
)
# Build parsers after rendering, so DeepSeek-V4 can use its custom encoder
# while still sharing the existing Dynamo parser/guided-decoding behavior.
tool_call_parser, reasoning_parser = create_parsers(
request,
tool_call_parser_name=tool_call_parser_name,
......
......@@ -11,12 +11,15 @@ Parallels test_vllm_unit.py for the vLLM backend.
import json
import sys
import types
import pytest
from sglang.srt.function_call.function_call_parser import FunctionCallParser
from sglang.srt.function_call.json_array_parser import JsonArrayParser
from sglang.srt.utils.hf_transformers_utils import get_tokenizer
import dynamo.frontend.sglang_prepost as sglang_prepost_module
import dynamo.frontend.sglang_processor as sglang_processor_module
from dynamo.frontend.sglang_prepost import (
SglangPreprocessResult,
......@@ -445,6 +448,85 @@ class TestBuildToolCallGuidedDecoding:
assert isinstance(guided, dict)
assert "json" in guided
def test_required_tool_choice_supports_older_sglang_constraint_signature(
self, monkeypatch
):
tools = convert_tools(
[
{
"type": "function",
"function": {
"name": "get_weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
)
def old_get_json_schema_constraint(sglang_tools, tool_choice):
assert sglang_tools == tools
assert tool_choice == "required"
return {"type": "array", "items": {"type": "object"}}
monkeypatch.setattr(
sglang_prepost_module,
"get_json_schema_constraint",
old_get_json_schema_constraint,
)
guided = build_tool_call_guided_decoding(
{"tool_choice": "required", "parallel_tool_calls": False},
tool_call_parser_name=None,
sglang_tools=tools,
)
assert guided == {"json": {"type": "array", "items": {"type": "object"}}}
def test_auto_tool_choice_supports_older_structure_constraint_signature(
self, monkeypatch
):
tools = convert_tools(
[
{
"type": "function",
"function": {
"name": "get_weather",
"strict": True,
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
]
)
class OldFunctionCallParser:
def __init__(self, *, tools, tool_call_parser):
self.tools = tools
self.tool_call_parser = tool_call_parser
def get_structure_constraint(self, tool_choice):
assert tool_choice == "auto"
return "structural_tag", {"type": "object"}
monkeypatch.setattr(
sglang_prepost_module,
"FunctionCallParser",
OldFunctionCallParser,
)
guided = build_tool_call_guided_decoding(
{"tool_choice": "auto", "parallel_tool_calls": False},
tool_call_parser_name="kimi_k2",
sglang_tools=tools,
)
assert guided == {"structural_tag": {"type": "object"}}
def test_auto_strict_tools_can_build_structural_tag_guidance(self):
tools = convert_tools(
[
......@@ -990,6 +1072,240 @@ class TestPreprocessChatRequest:
)
assert len(with_system.prompt_token_ids) > len(without_system.prompt_token_ids)
def test_deepseek_v4_uses_sglang_encoder_when_chat_template_missing(
self, monkeypatch
):
"""DeepSeek-V4 uses SGLang's encoder instead of HF chat_template."""
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
captured["thinking_mode"] = thinking_mode
captured["reasoning_effort"] = reasoning_effort
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def apply_chat_template(self, *args, **kwargs):
raise AssertionError("apply_chat_template should not be called")
def encode(self, prompt):
assert prompt == "<dsv4-prompt>"
return [1, 2, 3]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
"chat_template_kwargs": {
"thinking": True,
"reasoning_effort": "max",
},
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
},
},
}
],
}
result = preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
)
assert result.prompt_token_ids == [1, 2, 3]
assert captured["thinking_mode"] == "thinking"
assert captured["reasoning_effort"] == "max"
assert captured["messages"][0]["role"] == "system"
assert captured["messages"][0]["tools"][0]["function"]["name"] == "get_weather"
assert captured["messages"][1]["role"] == "user"
def test_deepseek_v4_named_tool_choice_filters_encoder_tools(self, monkeypatch):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [1]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
"tools": [
{
"type": "function",
"function": {"name": "get_weather", "parameters": {}},
},
{
"type": "function",
"function": {"name": "get_time", "parameters": {}},
},
],
"tool_choice": {
"type": "function",
"function": {"name": "get_time"},
},
}
preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name="deepseek_v4",
)
tools = captured["messages"][0]["tools"]
assert [tool["function"]["name"] for tool in tools] == ["get_time"]
def test_deepseek_v4_respects_existing_chat_template(self, monkeypatch):
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
raise AssertionError("encoding_dsv4 should not be called")
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class TemplateTokenizer:
chat_template = (
"{% for message in messages %}{{ message.content }}{% endfor %}"
)
def apply_chat_template(self, messages, **kwargs):
assert kwargs["add_generation_prompt"] is True
assert kwargs["tokenize"] is True
return [4, 5, 6]
def encode(self, prompt):
raise AssertionError("encode should not be called")
result = preprocess_chat_request(
{
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "user", "content": "Hello"}],
},
tokenizer=TemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
)
assert result.prompt_token_ids == [4, 5, 6]
def test_deepseek_v4_normalizes_none_content_without_mutating_request(
self, monkeypatch
):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [7]
request = {
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "assistant", "content": None}],
}
result = preprocess_chat_request(
request,
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
)
assert result.prompt_token_ids == [7]
assert captured["messages"] == [{"role": "assistant", "content": ""}]
assert request["messages"] == [{"role": "assistant", "content": None}]
def test_deepseek_v4_tool_choice_none_strips_encoder_tools(self, monkeypatch):
captured = {}
fake_module = types.ModuleType("sglang.srt.entrypoints.openai.encoding_dsv4")
def fake_encode_messages(messages, *, thinking_mode, reasoning_effort=None):
captured["messages"] = messages
return "<dsv4-prompt>"
fake_module.encode_messages = fake_encode_messages
monkeypatch.setitem(
sys.modules,
"sglang.srt.entrypoints.openai.encoding_dsv4",
fake_module,
)
class NoTemplateTokenizer:
chat_template = None
def encode(self, prompt):
return [8]
preprocess_chat_request(
{
"model": "deepseek-ai/DeepSeek-V4-Pro",
"messages": [{"role": "system", "content": "Stay terse."}],
"tools": [
{
"type": "function",
"function": {"name": "get_weather", "parameters": {}},
}
],
"tool_choice": "none",
},
tokenizer=NoTemplateTokenizer(),
tool_call_parser_name=None,
reasoning_parser_name=None,
exclude_tools_when_tool_choice_none=True,
)
assert "tools" not in captured["messages"][0]
# ---------------------------------------------------------------------------
# SglangStreamingPostProcessor: incremental detokenization
......
......@@ -15,13 +15,96 @@ removed. When the old version falls outside the support window, delete the
fallback and any associated polyfills.
"""
import inspect
import ipaddress
import logging
import socket
from functools import lru_cache
from typing import Any
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Top-level sglang exports: Engine, ServerArgs
#
# Some SGLang dev builds (including 0.5.x snapshots) do not re-export these
# from sglang/__init__.py, while Dynamo historically uses `import sglang as sgl`
# followed by `sgl.Engine(...)` throughout this backend.
# ---------------------------------------------------------------------------
def ensure_sglang_top_level_exports() -> None:
"""Restore top-level SGLang exports omitted by some install flavors."""
import sglang as sgl
if not hasattr(sgl, "Engine"):
from sglang.srt.entrypoints.engine import Engine
sgl.Engine = Engine
if not hasattr(sgl, "ServerArgs"):
from sglang.srt.server_args import ServerArgs
sgl.ServerArgs = ServerArgs
ensure_sglang_top_level_exports()
@lru_cache(maxsize=32)
def _get_async_generate_supported_kwarg_names(
async_generate: Any,
) -> frozenset[str] | None:
"""Return supported async_generate keyword names, or None for **kwargs."""
try:
signature = inspect.signature(async_generate)
except (TypeError, ValueError):
logger.debug(
"Could not inspect SGLang Engine.async_generate signature; "
"dropping optional compatibility kwargs"
)
return frozenset()
names: set[str] = set()
for name, param in signature.parameters.items():
if param.kind == inspect.Parameter.VAR_KEYWORD:
return None
if param.kind in (
inspect.Parameter.POSITIONAL_OR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
):
names.add(name)
return frozenset(names)
def filter_supported_async_generate_kwargs(
engine: Any, kwargs: dict[str, Any]
) -> dict[str, Any]:
"""Return only async_generate kwargs accepted by this SGLang engine.
SGLang occasionally adds optional Engine.async_generate kwargs before every
supported install flavor has them. Keep the compatibility boundary narrow:
callers decide which kwargs are optional, and this helper only drops those
optional kwargs when the installed engine cannot accept them.
"""
async_generate = engine.async_generate
signature_source = getattr(async_generate, "__func__", async_generate)
try:
supported_kwarg_names = _get_async_generate_supported_kwarg_names(
signature_source
)
except TypeError:
supported_kwarg_names = _get_async_generate_supported_kwarg_names.__wrapped__(
signature_source
)
if supported_kwarg_names is None:
return kwargs
return {key: value for key, value in kwargs.items() if key in supported_kwarg_names}
# ---------------------------------------------------------------------------
# Network utilities: NetworkAddress, get_local_ip_auto, get_zmq_socket
#
......@@ -201,6 +284,8 @@ def enable_disjoint_streaming_output(server_args: Any) -> None:
__all__ = [
"NetworkAddress",
"enable_disjoint_streaming_output",
"ensure_sglang_top_level_exports",
"filter_supported_async_generate_kwargs",
"get_local_ip_auto",
"get_scheduler_info",
"get_zmq_socket",
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import asyncio
import json
import logging
......
......@@ -13,6 +13,7 @@ from dynamo._core import Context
from dynamo.common.constants import DisaggregationMode
from dynamo.common.utils.engine_response import normalize_finish_reason
from dynamo.common.utils.otel_tracing import build_trace_headers
from dynamo.sglang._compat import filter_supported_async_generate_kwargs
from dynamo.sglang.args import Config
from dynamo.sglang.publisher import DynamoSglangPublisher
from dynamo.sglang.request_handlers.handler_base import BaseWorkerHandler
......@@ -275,6 +276,9 @@ class DecodeWorkerHandler(BaseWorkerHandler):
return_routed_experts = getattr(
self.config.server_args, "enable_return_routed_experts", False
)
routed_experts_kwargs = filter_supported_async_generate_kwargs(
self.engine, {"return_routed_experts": return_routed_experts}
)
priority = (request.get("routing") or {}).get("priority")
logprob_kwargs = self._build_logprob_kwargs(request)
......@@ -308,7 +312,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
**input_param,
sampling_params=sampling_params,
stream=True,
return_routed_experts=return_routed_experts,
**routed_experts_kwargs,
bootstrap_host=bootstrap_info["bootstrap_host"],
bootstrap_port=bootstrap_info["bootstrap_port"],
bootstrap_room=bootstrap_info["bootstrap_room"],
......@@ -346,7 +350,7 @@ class DecodeWorkerHandler(BaseWorkerHandler):
video_data=video_data,
sampling_params=sampling_params,
stream=True,
return_routed_experts=return_routed_experts,
**routed_experts_kwargs,
external_trace_header=trace_header,
rid=trace_id,
data_parallel_rank=dp_rank,
......
......@@ -11,6 +11,11 @@ import pytest
import yaml
from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST
import dynamo.sglang._compat as sglang_compat
from dynamo.sglang._compat import (
ensure_sglang_top_level_exports,
filter_supported_async_generate_kwargs,
)
from dynamo.sglang.args import parse_args
from dynamo.sglang.health_check import (
SglangDisaggHealthCheckPayload,
......@@ -38,6 +43,99 @@ pytestmark = [
mock_sglang_cli = make_cli_args_fixture("dynamo.sglang")
def test_compat_restores_sglang_top_level_exports():
"""Dynamo supports SGLang builds that omit top-level Engine/ServerArgs."""
import sglang as sgl
from sglang.srt.entrypoints.engine import Engine
from sglang.srt.server_args import ServerArgs
missing = object()
original_engine = getattr(sgl, "Engine", missing)
original_server_args = getattr(sgl, "ServerArgs", missing)
try:
if hasattr(sgl, "Engine"):
delattr(sgl, "Engine")
if hasattr(sgl, "ServerArgs"):
delattr(sgl, "ServerArgs")
ensure_sglang_top_level_exports()
assert sgl.Engine is Engine
assert sgl.ServerArgs is ServerArgs
finally:
if original_engine is missing:
if hasattr(sgl, "Engine"):
delattr(sgl, "Engine")
else:
sgl.Engine = original_engine
if original_server_args is missing:
if hasattr(sgl, "ServerArgs"):
delattr(sgl, "ServerArgs")
else:
sgl.ServerArgs = original_server_args
def test_compat_filters_async_generate_kwargs_for_older_engines():
class OldEngine:
async def async_generate(self, input_ids=None, sampling_params=None):
return None
kwargs = {
"input_ids": [1, 2, 3],
"return_routed_experts": True,
}
assert filter_supported_async_generate_kwargs(OldEngine(), kwargs) == {
"input_ids": [1, 2, 3]
}
def test_compat_keeps_async_generate_kwargs_for_newer_engines():
class NewEngine:
async def async_generate(self, return_routed_experts=False):
return None
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(NewEngine(), kwargs) == kwargs
def test_compat_keeps_async_generate_kwargs_for_variadic_engines():
class VariadicEngine:
async def async_generate(self, **kwargs):
return None
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(VariadicEngine(), kwargs) == kwargs
def test_compat_caches_async_generate_signature_inspection(monkeypatch):
class CachedEngine:
async def async_generate(self, return_routed_experts=False):
return None
sglang_compat._get_async_generate_supported_kwarg_names.cache_clear()
calls = 0
original_signature = sglang_compat.inspect.signature
def counting_signature(obj):
nonlocal calls
calls += 1
return original_signature(obj)
monkeypatch.setattr(sglang_compat.inspect, "signature", counting_signature)
kwargs = {"return_routed_experts": True}
assert filter_supported_async_generate_kwargs(CachedEngine(), kwargs) == kwargs
assert filter_supported_async_generate_kwargs(CachedEngine(), kwargs) == kwargs
assert calls == 1
sglang_compat._get_async_generate_supported_kwarg_names.cache_clear()
@pytest.mark.asyncio
async def test_custom_jinja_template_invalid_path(mock_sglang_cli):
"""Test that invalid file path raises FileNotFoundError."""
......
......@@ -1227,8 +1227,9 @@ impl OpenAIPreprocessor {
/// For kimi_k25: disabled when chat_template_args contains "thinking": false.
/// For nemotron_nano: disabled when chat_template_args contains "enable_thinking": false
/// or "force_nonempty_content": true.
/// For deepseek_r1: disabled when chat_template_args contains "thinking": false
/// or "thinking_mode": "chat".
/// For deepseek_r1 / deepseek_v4: disabled when chat_template_args contains
/// "thinking": false or "thinking_mode": "chat" — matches the V4 formatter's
/// `resolve_thinking_mode` convention, so the parser and the prompt stay in sync.
fn is_reasoning_disabled_by_request(
reasoning_parser: Option<&str>,
chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>,
......@@ -1257,7 +1258,8 @@ impl OpenAIPreprocessor {
}
false
}
Some("deepseek_r1") => {
Some("deepseek_r1") | Some("deepseek_v4") | Some("deepseek-v4")
| Some("deepseekv4") => {
if let Some(args) = chat_template_args {
if let Some(thinking) = args.get("thinking") {
return thinking == &serde_json::Value::Bool(false);
......@@ -1829,6 +1831,50 @@ mod tests {
false,
"nemotron_nano + empty args → enabled",
),
// deepseek_v4 — same convention as deepseek_r1; verify all three aliases
// (deepseek_v4 / deepseek-v4 / deepseekv4) plus both signal keys.
(
Some("deepseek_v4"),
Some(&thinking_false),
true,
"deepseek_v4 + thinking=false → disabled",
),
(
Some("deepseek_v4"),
Some(&thinking_true),
false,
"deepseek_v4 + thinking=true → enabled",
),
(
Some("deepseek_v4"),
Some(&thinking_mode_chat),
true,
"deepseek_v4 + thinking_mode=chat → disabled",
),
(
Some("deepseek_v4"),
Some(&thinking_mode_thinking),
false,
"deepseek_v4 + thinking_mode=thinking → enabled",
),
(
Some("deepseek_v4"),
None,
false,
"deepseek_v4 + no args → enabled",
),
(
Some("deepseek-v4"),
Some(&thinking_false),
true,
"deepseek-v4 (hyphen alias) + thinking=false → disabled",
),
(
Some("deepseekv4"),
Some(&thinking_mode_chat),
true,
"deepseekv4 (joined alias) + thinking_mode=chat → disabled",
),
];
for (parser, args, expected, desc) in cases {
......
......@@ -26,6 +26,7 @@ use std::sync::Arc;
use crate::preprocessor::media::MediaDecoder;
pub mod deepseek_v32;
pub mod deepseek_v4;
mod template;
pub use template::{ChatTemplate, ContextMixins};
......
This diff is collapsed.
......@@ -19,12 +19,35 @@ use tokcfg::ChatTemplateValue;
impl PromptFormatter {
pub fn from_mdc(mdc: &ModelDeploymentCard) -> Result<PromptFormatter> {
// Special handling for DeepSeek-V3.2(-Speciale) which doesn't provide Jinja chat_template
let name_lower = mdc.display_name.to_lowercase();
if name_lower.contains("deepseek")
&& name_lower.contains("v3.2")
&& !name_lower.contains("exp")
{
// Special handling for DeepSeek models whose HF repos don't ship a Jinja chat_template.
//
// Prefer the authoritative `model_type` from config.json — it's set by
// the model author and survives any `--served-model-name` rename. Fall
// back to a tight substring match on `display_name` only when config.json
// is absent (e.g., tokenizer-only MDCs) or unreadable.
//
// An empty `model_type` string (rare but legal in the JSON) carries
// no signal — normalize it to `None` so the display-name fallback
// still runs instead of being silently suppressed.
let model_type_lower = mdc
.model_info
.as_ref()
.and_then(|info| info.get_model_info().ok())
.map(|info| info.model_type().to_lowercase())
.filter(|s| !s.is_empty());
let display_name_lower = mdc.display_name.to_lowercase();
if is_deepseek_v4(&model_type_lower, &display_name_lower) {
tracing::info!(
model_type = ?model_type_lower,
display_name = %mdc.display_name,
"Detected DeepSeek V4 model, using native Rust formatter",
);
return Ok(Self::OAI(Arc::new(
super::deepseek_v4::DeepSeekV4Formatter::new_thinking(),
)));
}
if is_deepseek_v3_2_non_exp(&model_type_lower, &display_name_lower) {
tracing::info!("Detected DeepSeek V3.2 model (non-Exp), using native Rust formatter");
return Ok(Self::OAI(Arc::new(
super::deepseek_v32::DeepSeekV32Formatter::new_thinking(),
......@@ -187,3 +210,158 @@ struct HfTokenizerConfigJsonFormatter {
pub struct ContextMixins {
context_mixins: HashSet<PromptContextMixin>,
}
/// Decides whether to activate the DeepSeek-V4 native formatter.
///
/// Primary signal: config.json `model_type`. DeepSeek-V4-Pro and V4-Flash both
/// ship `"model_type": "deepseek_v4"`, set by the model author — this survives
/// any `--served-model-name` rename.
///
/// Fallback: `display_name`, tight-matched against
/// `^deepseek(?:[-_.])?v4(?:[-_.]|$)`. Only consulted when config.json is
/// absent (tokenizer-only MDCs) or unreadable; a concrete config.json value
/// that is *not* `deepseek_v4` is authoritative and suppresses the fallback.
fn is_deepseek_v4(model_type_lower: &Option<String>, display_name_lower: &str) -> bool {
match model_type_lower.as_deref() {
Some("deepseek_v4") => true,
Some(_) => false, // config.json says something else — trust it
None => is_deepseek_v4_name(display_name_lower),
}
}
/// Decides whether to activate the DeepSeek-V3.2 (non-Exp) native formatter.
/// Same config-primary / name-fallback rule as V4.
fn is_deepseek_v3_2_non_exp(model_type_lower: &Option<String>, display_name_lower: &str) -> bool {
let name_match = display_name_lower.contains("deepseek")
&& display_name_lower.contains("v3.2")
&& !display_name_lower.contains("exp");
match model_type_lower.as_deref() {
Some("deepseek_v3_2") => !display_name_lower.contains("exp"),
Some(_) => false,
None => name_match,
}
}
/// Tight, anchored match for DeepSeek-V4 display names. Equivalent to the
/// regex `^deepseek(?:[-_.])?v4(?:[-_.]|$)` over an already-lowercased string.
/// Written with string ops to avoid pulling in the `regex` crate.
///
/// Rejects composite names that previously short-circuited the V4 branch:
/// - `deepseek-v3.2-v4-foo` (the `v3.2` variant is the real one)
/// - `deepseek-v40` / `deepseek-v4pro` (no separator after `v4`)
/// - `my-deepseek-v4` (prefix must be at the start)
fn is_deepseek_v4_name(name_lower: &str) -> bool {
let Some(rest) = name_lower.strip_prefix("deepseek") else {
return false;
};
// Optional single separator between "deepseek" and "v4".
let rest = rest
.strip_prefix(|c: char| matches!(c, '-' | '_' | '.'))
.unwrap_or(rest);
let Some(after_v4) = rest.strip_prefix("v4") else {
return false;
};
// `v4` must end the name or be followed by a separator — anything else
// (e.g. `v40`, `v4pro`) is a different model family.
after_v4.is_empty() || after_v4.starts_with(['-', '_', '.'])
}
#[cfg(test)]
mod detection_tests {
use super::{is_deepseek_v3_2_non_exp, is_deepseek_v4, is_deepseek_v4_name};
#[test]
fn v4_name_matches_canonical_variants() {
for name in [
"deepseek-v4",
"deepseek_v4",
"deepseek.v4",
"deepseekv4",
"deepseek-v4-pro",
"deepseek-v4-flash",
"deepseek-v4-flash-2507",
"deepseek-v4.1",
"deepseek_v4_thinking",
] {
assert!(is_deepseek_v4_name(name), "expected {name} to match V4");
}
}
#[test]
fn v4_name_rejects_non_v4() {
// Composite names that previously short-circuited to V4 before the
// V3.2 branch — now correctly rejected.
for name in [
"deepseek-v3.2-v4-foo",
"my-deepseek-v4",
"deepseek-v40",
"deepseek-v4pro",
"deepseekv40",
"deepseek-v3",
"deepseek-v3.2",
"deepseek-r1",
"qwen3-v4", // only deepseek-prefixed names qualify
"dsflash",
"",
] {
assert!(
!is_deepseek_v4_name(name),
"expected {name} to NOT match V4",
);
}
}
#[test]
fn v4_detection_prefers_config_model_type() {
// config.json `model_type = "deepseek_v4"` wins regardless of what
// the operator calls the model via --served-model-name.
let v4 = Some("deepseek_v4".to_string());
for display in ["dsflash", "my-pet-model", "llama-3-8b", ""] {
assert!(
is_deepseek_v4(&v4, display),
"config says deepseek_v4, display {display:?} — expected V4",
);
}
// A concrete non-V4 config.json suppresses the display-name fallback.
// Even if the operator names the served model "deepseek-v4", a model
// with `model_type = "llama"` is NOT DeepSeek-V4.
let llama = Some("llama".to_string());
for display in ["deepseek-v4", "deepseek-v4-flash", "anything"] {
assert!(
!is_deepseek_v4(&llama, display),
"config says llama, display {display:?} — expected NOT V4",
);
}
// No config.json — fall back to display-name match.
assert!(is_deepseek_v4(&None, "deepseek-v4-flash"));
assert!(!is_deepseek_v4(&None, "dsflash"));
// A config.json with `"model_type": ""` is treated as "no signal" at
// the call site (normalized to None before is_deepseek_v4 is called),
// so the display-name fallback still runs — pin that contract.
let empty: Option<String> = None;
assert!(is_deepseek_v4(&empty, "deepseek-v4-flash"));
assert!(!is_deepseek_v4(&empty, "dsflash"));
}
#[test]
fn v3_2_detection_prefers_config_model_type() {
// config says deepseek_v3_2, any non-"exp" display name triggers.
let v3_2 = Some("deepseek_v3_2".to_string());
assert!(is_deepseek_v3_2_non_exp(&v3_2, "whatever"));
assert!(is_deepseek_v3_2_non_exp(&v3_2, "deepseek-v3.2"));
// V3.2-Exp is a separate model family; suppress even via config.
assert!(!is_deepseek_v3_2_non_exp(&v3_2, "deepseek-v3.2-exp"));
// Other config types lose regardless of display name.
let other = Some("deepseek_v4".to_string());
assert!(!is_deepseek_v3_2_non_exp(&other, "deepseek-v3.2"));
// No config — fall back to the original display-name heuristic.
assert!(is_deepseek_v3_2_non_exp(&None, "deepseek-v3.2-pro"));
assert!(!is_deepseek_v3_2_non_exp(&None, "deepseek-v3.2-exp"));
assert!(!is_deepseek_v3_2_non_exp(&None, "deepseek-v4"));
}
}
{"tools":[{"type":"function","function":{"name":"get_weather","description":"Get the weather for a specific location","parameters":{"type":"object","properties":{"location":{"type":"string","description":"The city name"},"unit":{"type":"string","enum":["celsius","fahrenheit"],"description":"Temperature unit"}},"required":["location"]}}},{"type":"function","function":{"name":"search","description":"Search the web for information","parameters":{"type":"object","properties":{"query":{"type":"string","description":"Search query"},"num_results":{"type":"integer","description":"Number of results to return"}},"required":["query"]}}}],"messages":[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"What's the weather in Beijing?"},{"role":"assistant","reasoning_content":"The user wants to know the weather in Beijing. I should use the get_weather tool.","tool_calls":[{"id":"call_001","type":"function","function":{"name":"get_weather","arguments":"{\"location\": \"Beijing\", \"unit\": \"celsius\"}"}}]},{"role":"tool","tool_call_id":"call_001","content":"{\"temperature\": 22, \"condition\": \"sunny\", \"humidity\": 45}"},{"role":"assistant","reasoning_content":"Got the weather data. Let me format a nice response.","content":"The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity."}]}
[{"role":"system","content":"You are a helpful assistant."},{"role":"user","content":"Hello"},{"role":"assistant","reasoning_content":"The user said hello, I should greet back.","content":"Hi there! How can I help you?"},{"role":"user","content":"What is the capital of France?"},{"role":"assistant","reasoning_content":"The user asks about the capital of France. It is Paris.","content":"The capital of France is Paris."}]
[{"role":"system","content":"该助手为DeepSeek,由深度求索公司创造。"},{"role":"latest_reminder","content":"2026-02-21,星期六,广州,App,中文"},{"role":"developer","content":"小柴胡冲剂和布洛芬能一起吃吗?\n\nCITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】","tools":[{"type":"function","function":{"name":"search","description":"Web search. Split multiple queries with '||'.","parameters":{"type":"object","properties":{"queries":{"type":"string","description":"query1||query2"}},"required":["queries"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}},{"type":"function","function":{"name":"open","description":"Batch open IDs (format 【{id}†...】) or URLs.","parameters":{"type":"object","properties":{"open_list":{"type":"array","items":{"type":"object","properties":{"id":{"description":"ID or URL","anyOf":[{"type":"integer"},{"type":"string"}],"default":-1},"cursor":{"type":"integer","description":"","default":-1},"loc":{"type":"integer","description":"Start line","default":-1},"num_lines":{"type":"integer","description":"","default":-1},"view_source":{"type":"boolean","description":"","default":false}},"additionalProperties":false},"description":""}},"required":["open_list"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}},{"type":"function","function":{"name":"find","description":"Find exact text pattern in pages.","parameters":{"type":"object","properties":{"find_list":{"type":"array","items":{"type":"object","properties":{"pattern":{"type":"string","description":""},"cursor":{"type":"integer","description":"","default":-1}},"required":["pattern"],"additionalProperties":false},"description":""}},"required":["find_list"],"additionalProperties":false,"$schema":"http://json-schema.org/draft-07/schema#"}}}]},{"role":"assistant","content":"","reasoning_content":"用户想知道小柴胡冲剂和布洛芬能否一起服用。","tool_calls":[{"type":"function","function":{"name":"search","arguments":"{\"queries\": \"小柴胡冲剂 布洛芬 相互作用 一起吃\"}"}}]},{"role":"tool","content":"[0]"},{"role":"assistant","content":"请及时就医。","reasoning_content":"现在开始组织回答。","tool_calls":[]}]
[{"role":"system","content":"该助手为DeepSeek-V3,由深度求索公司创造。\n今天是2025年10月17日,星期五。"},{"role":"latest_reminder","content":"2024-11-15,上海市,App,中文"},{"role":"user","content":"热海大滚锅是世界著名温泉吗"},{"role":"assistant","content":"关于热海大滚锅是否是世界著名温泉,可以这样总结:它在**中国乃至全球的地热奇观中占有重要地位**,但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**,而非普遍的温泉疗养体验。\n\n为了让你快速了解,我整理了一个简要的表格:\n\n| 维度 | 热海大滚锅的地位与特点 |\n| :--- | :--- |\n| **地质奇观** | **世界罕见**的高温地热系统,泉眼多、水温高、形态多样。 |\n| **国内声誉** | **中国三大地热区之一**,**国家5A级旅游景区**,自古闻名(徐霞客曾游历并记载)。 |\n| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一,但此说法流传不广,其国际声誉更多建立在地质独特性上。 |\n| **核心体验** | **观赏地热奇观**(如97℃沸腾的“大滚锅”)、**体验温泉煮鸡蛋**。 |\n\n### 💡 游玩攻略与温馨提示\n\n如果你计划前往热海大滚锅,这里有一些实用信息供你参考:\n\n- **门票与开放时间**:\n - **门票**:景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐,价格会更高,例如约**288元**。\n - **开放时间**:景区一般**08:00-18:00**开放,但具体时间可能变动,建议提前核实。\n\n- **特色体验**:\n - **温泉煮鸡蛋**:这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋(约5-8元/串),然后到“大滚锅”旁的指定区域蒸煮,几分钟便可熟食,趣味十足。\n - **金汤足浴**:可以直接用从“大滚锅”流出的温泉水泡脚,缓解旅途疲劳。\n\n- **注意事项**:\n - **安全第一**:“大滚锅”水温极高,务必遵守游览规则,在指定区域内观赏,切勿随意触碰泉水。\n - **规划行程**:建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路,出入口有观光车接送。\n\n希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问,我很乐意提供进一步的信息。","mask":1},{"role":"user","content":"世界著名温泉有哪些","task":"action"},{"role":"assistant","content":"Search"}]
<|begin▁of▁sentence|>You are a helpful assistant.
## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<|DSML|tool_calls>" block like the following:
<|DSML|tool_calls>
<|DSML|invoke name="$TOOL_NAME">
<|DSML|parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</|DSML|parameter>
...
</|DSML|invoke>
<|DSML|invoke name="$TOOL_NAME2">
...
</|DSML|invoke>
</|DSML|tool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
{"name": "get_weather", "description": "Get the weather for a specific location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "Temperature unit"}}, "required": ["location"]}}
{"name": "search", "description": "Search the web for information", "parameters": {"type": "object", "properties": {"query": {"type": "string", "description": "Search query"}, "num_results": {"type": "integer", "description": "Number of results to return"}}, "required": ["query"]}}
You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
<|User|>What's the weather in Beijing?<|Assistant|><think>The user wants to know the weather in Beijing. I should use the get_weather tool.</think>
<|DSML|tool_calls>
<|DSML|invoke name="get_weather">
<|DSML|parameter name="location" string="true">Beijing</|DSML|parameter>
<|DSML|parameter name="unit" string="true">celsius</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls><|end▁of▁sentence|><|User|><tool_result>{"temperature": 22, "condition": "sunny", "humidity": 45}</tool_result><|Assistant|><think>Got the weather data. Let me format a nice response.</think>The weather in Beijing is currently sunny with a temperature of 22°C and 45% humidity.<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>You are a helpful assistant.<|User|>Hello<|Assistant|></think>Hi there! How can I help you?<|end▁of▁sentence|><|User|>What is the capital of France?<|Assistant|><think>The user asks about the capital of France. It is Paris.</think>The capital of France is Paris.<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>该助手为DeepSeek,由深度求索公司创造。<|latest_reminder|>2026-02-21,星期六,广州,App,中文<|User|>小柴胡冲剂和布洛芬能一起吃吗?
CITATION FORMAT: 【{cursor_id}†L{start_line_id}(-L{end_line_id})?】
## Tools
You have access to a set of tools to help answer the user's question. You can invoke tools by writing a "<|DSML|tool_calls>" block like the following:
<|DSML|tool_calls>
<|DSML|invoke name="$TOOL_NAME">
<|DSML|parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</|DSML|parameter>
...
</|DSML|invoke>
<|DSML|invoke name="$TOOL_NAME2">
...
</|DSML|invoke>
</|DSML|tool_calls>
String parameters should be specified as is and set `string="true"`. For all other types (numbers, booleans, arrays, objects), pass the value in JSON format and set `string="false"`.
If thinking_mode is enabled (triggered by <think>), you MUST output your complete reasoning inside <think>...</think> BEFORE any tool calls or final response.
Otherwise, output directly after </think> with tool calls or final response.
### Available Tool Schemas
{"name": "search", "description": "Web search. Split multiple queries with '||'.", "parameters": {"type": "object", "properties": {"queries": {"type": "string", "description": "query1||query2"}}, "required": ["queries"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
{"name": "open", "description": "Batch open IDs (format 【{id}†...】) or URLs.", "parameters": {"type": "object", "properties": {"open_list": {"type": "array", "items": {"type": "object", "properties": {"id": {"description": "ID or URL", "anyOf": [{"type": "integer"}, {"type": "string"}], "default": -1}, "cursor": {"type": "integer", "description": "", "default": -1}, "loc": {"type": "integer", "description": "Start line", "default": -1}, "num_lines": {"type": "integer", "description": "", "default": -1}, "view_source": {"type": "boolean", "description": "", "default": false}}, "additionalProperties": false}, "description": ""}}, "required": ["open_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
{"name": "find", "description": "Find exact text pattern in pages.", "parameters": {"type": "object", "properties": {"find_list": {"type": "array", "items": {"type": "object", "properties": {"pattern": {"type": "string", "description": ""}, "cursor": {"type": "integer", "description": "", "default": -1}}, "required": ["pattern"], "additionalProperties": false}, "description": ""}}, "required": ["find_list"], "additionalProperties": false, "$schema": "http://json-schema.org/draft-07/schema#"}}
You MUST strictly follow the above defined tool name and parameter schemas to invoke tool calls.
<|Assistant|><think>用户想知道小柴胡冲剂和布洛芬能否一起服用。</think>
<|DSML|tool_calls>
<|DSML|invoke name="search">
<|DSML|parameter name="queries" string="true">小柴胡冲剂 布洛芬 相互作用 一起吃</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls><|end▁of▁sentence|><|User|><tool_result>[0]</tool_result><|Assistant|><think>现在开始组织回答。</think>请及时就医。<|end▁of▁sentence|>
\ No newline at end of file
<|begin▁of▁sentence|>该助手为DeepSeek-V3,由深度求索公司创造。
今天是2025年10月17日,星期五。<|latest_reminder|>2024-11-15,上海市,App,中文<|User|>热海大滚锅是世界著名温泉吗<|Assistant|></think>关于热海大滚锅是否是世界著名温泉,可以这样总结:它在**中国乃至全球的地热奇观中占有重要地位**,但“世界著名”的称号更侧重于它作为**独特的地质现象和旅游景点**,而非普遍的温泉疗养体验。
为了让你快速了解,我整理了一个简要的表格:
| 维度 | 热海大滚锅的地位与特点 |
| :--- | :--- |
| **地质奇观** | **世界罕见**的高温地热系统,泉眼多、水温高、形态多样。 |
| **国内声誉** | **中国三大地热区之一**,**国家5A级旅游景区**,自古闻名(徐霞客曾游历并记载)。 |
| **国际知名度** | 在一些旅游平台被提及为“世界六大温泉”之一,但此说法流传不广,其国际声誉更多建立在地质独特性上。 |
| **核心体验** | **观赏地热奇观**(如97℃沸腾的“大滚锅”)、**体验温泉煮鸡蛋**。 |
### 💡 游玩攻略与温馨提示
如果你计划前往热海大滚锅,这里有一些实用信息供你参考:
- **门票与开放时间**:
- **门票**:景区门票约为**50元/人**。如果选择包含温泉沐浴的套餐,价格会更高,例如约**288元**。
- **开放时间**:景区一般**08:00-18:00**开放,但具体时间可能变动,建议提前核实。
- **特色体验**:
- **温泉煮鸡蛋**:这几乎是必试项目。可以在景区门口购买用草绳串起的生鸡蛋(约5-8元/串),然后到“大滚锅”旁的指定区域蒸煮,几分钟便可熟食,趣味十足。
- **金汤足浴**:可以直接用从“大滚锅”流出的温泉水泡脚,缓解旅途疲劳。
- **注意事项**:
- **安全第一**:“大滚锅”水温极高,务必遵守游览规则,在指定区域内观赏,切勿随意触碰泉水。
- **规划行程**:建议为热海景区预留**3-4小时**的游览时间。景区内步道不走回头路,出入口有观光车接送。
希望这些信息能帮助你更好地了解热海大滚锅。如果你对腾冲的其他景点或者行程规划有更多疑问,我很乐意提供进一步的信息。<|end▁of▁sentence|><|User|>世界著名温泉有哪些<|Assistant|></think><|action|>Search<|end▁of▁sentence|>
\ No newline at end of file
{
"request_id": "deepseek-v4-content-before-tool-test",
"expected_output": {"normal_content": "Let me check the forecast for Tokyo right now.", "reasoning_content": "The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_weather", "arguments": "{\"location\": \"Tokyo\", \"unit\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<think>The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather.</think>","role":"assistant","reasoning_content":"The user wants today's weather in Tokyo. I'll acknowledge the request, then call get_weather."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"Let me check the forecast for Tokyo right now.","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Tokyo</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"unit\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-content-before-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment