Unverified Commit 9efc4db9 authored by Matthew Bonanni's avatar Matthew Bonanni Committed by GitHub
Browse files

[Bugfix] Fix DeepSeek-V3.2 tokenizer stripping spaces (#37004)


Signed-off-by: default avatarMatthew Bonanni <mbonanni@redhat.com>
parent f1816fb1
......@@ -540,6 +540,8 @@ class ModelConfig:
self.tokenizer_mode = "kimi_audio"
elif arch == "QwenVLForConditionalGeneration":
self.tokenizer_mode = "qwen_vl"
elif arch == "DeepseekV32ForCausalLM":
self.tokenizer_mode = "deepseek_v32"
if self.tokenizer_mode != "auto":
logger.info(
......
......@@ -3,7 +3,7 @@
import copy
from typing import Any
from transformers import AutoTokenizer
from transformers import PreTrainedTokenizerFast
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
......@@ -85,5 +85,5 @@ def get_deepseek_v32_tokenizer(tokenizer: HfTokenizer) -> HfTokenizer:
class DeepseekV32Tokenizer(TokenizerLike):
@classmethod
def from_pretrained(cls, *args, **kwargs) -> HfTokenizer:
tokenizer = AutoTokenizer.from_pretrained(*args, **kwargs)
tokenizer = PreTrainedTokenizerFast.from_pretrained(*args, **kwargs)
return get_cached_tokenizer(get_deepseek_v32_tokenizer(tokenizer))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment