# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
from pathlib import Path
from types import SimpleNamespace
import pytest
from vllm.entrypoints.chat_utils import parse_chat_messages
from vllm.renderers.registry import RENDERER_REGISTRY
from vllm.tokenizers.deepseek_v4 import get_deepseek_v4_tokenizer
from vllm.tokenizers.registry import TokenizerRegistry
FIXTURES_DIR = Path(__file__).parent / "fixtures" / "deepseek_v4"
class FakeHfTokenizer:
vocab_size = 100
def get_added_vocab(self) -> dict[str, int]:
return {"": 100}
def encode(
self,
text: str,
add_special_tokens: bool = False,
**kwargs,
) -> list[int]:
self.last_encode = (text, add_special_tokens, kwargs)
return [len(text)]
def _tokenizer():
return get_deepseek_v4_tokenizer(FakeHfTokenizer())
def _model_config():
return SimpleNamespace(
multimodal_config=None,
allowed_local_media_path="",
allowed_media_domains=None,
)
def _load_reference_case(case_id: int):
data = json.loads((FIXTURES_DIR / f"test_input_{case_id}.json").read_text())
if isinstance(data, dict):
return data["messages"], data.get("tools")
return data, None
def _render_reference_case(case_id: int, **kwargs):
messages, tools = _load_reference_case(case_id)
conversation, _, _ = parse_chat_messages(
messages,
_model_config(),
content_format="string",
)
return _tokenizer().apply_chat_template(
conversation=conversation,
messages=messages,
tools=tools,
tokenize=False,
**kwargs,
)
def test_deepseek_v4_tokenizer_registered():
assert TokenizerRegistry.load_tokenizer_cls("deepseek_v4").__name__ == (
"DeepseekV4Tokenizer"
)
assert RENDERER_REGISTRY.load_renderer_cls("deepseek_v4").__name__ == (
"DeepseekV4Renderer"
)
def test_deepseek_v4_defaults_to_chat_mode():
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
)
assert prompt == ("<|begin▁of▁sentence|><|User|>Hello<|Assistant|>")
@pytest.mark.parametrize("kwargs", [{"thinking": True}, {"enable_thinking": True}])
def test_deepseek_v4_enables_thinking_with_compatible_kwargs(kwargs):
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
**kwargs,
)
assert prompt == ("<|begin▁of▁sentence|><|User|>Hello<|Assistant|>")
def test_deepseek_v4_uses_v4_tool_prompt_from_request_tools():
tools = [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a city",
"parameters": {
"type": "object",
"properties": {"city": {"type": "string"}},
"required": ["city"],
},
},
}
]
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Weather?"}],
tools=tools,
tokenize=False,
)
assert "## Tools" in prompt
assert "<|DSML|tool_calls>" in prompt
assert "|DSML|tool_calls>" in prompt
assert "function_calls" not in prompt
assert '"name": "get_weather"' in prompt
assert prompt.endswith("<|User|>Weather?<|Assistant|>")
def test_deepseek_v4_renders_parsed_history_tool_arguments():
messages = [
{"role": "user", "content": "List the repo"},
{
"role": "assistant",
"tool_calls": [
{
"id": "call_1",
"type": "function",
"function": {
"name": "str_replace_editor",
"arguments": '{"command": "view", "path": "/testbed"}',
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_1",
"content": "file list",
},
]
tools = [
{
"type": "function",
"function": {
"name": "str_replace_editor",
"description": "Edit files",
"parameters": {
"type": "object",
"properties": {
"command": {"type": "string"},
"path": {"type": "string"},
},
"required": ["command", "path"],
},
},
}
]
conversation, _, _ = parse_chat_messages(
messages,
_model_config(),
content_format="string",
)
prompt = _tokenizer().apply_chat_template(
conversation=conversation,
messages=messages,
tools=tools,
tokenize=False,
)
assert '<|DSML|parameter name="command" string="true">view' in prompt
assert '<|DSML|parameter name="path" string="true">/testbed' in prompt
assert 'parameter name="arguments"' not in prompt
@pytest.mark.parametrize("reasoning_effort", ["none", "low", "medium", "high"])
def test_deepseek_v4_accepts_openai_reasoning_effort_values(reasoning_effort):
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
enable_thinking=True,
reasoning_effort=reasoning_effort,
)
assert prompt.endswith("<|Assistant|>")
assert "Reasoning Effort: Absolute maximum" not in prompt
def test_deepseek_v4_preserves_reference_max_reasoning_effort():
prompt = _tokenizer().apply_chat_template(
[{"role": "user", "content": "Hello"}],
tokenize=False,
enable_thinking=True,
reasoning_effort="max",
)
assert prompt.startswith(
"<|begin▁of▁sentence|>Reasoning Effort: Absolute maximum"
)
@pytest.mark.parametrize(
("case_id", "kwargs"),
[
(1, {"thinking": True}),
(2, {"thinking": True}),
(3, {"thinking": True}),
(4, {}),
],
)
def test_deepseek_v4_matches_reference_golden_fixtures(case_id, kwargs):
prompt = _render_reference_case(case_id, **kwargs)
expected = (FIXTURES_DIR / f"test_output_{case_id}.txt").read_text()
assert prompt == expected