Unverified Commit a9b6f95c authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: allow deepseek_v3 architecture to use Kimi's bpe pattern (#6653)

parent 002c6823
...@@ -150,10 +150,15 @@ fn detect_bpe_pattern(directory: &Path) -> Result<&'static str> { ...@@ -150,10 +150,15 @@ fn detect_bpe_pattern(directory: &Path) -> Result<&'static str> {
})?; })?;
match model_type.as_str() { match model_type.as_str() {
"kimi" | "kimi_k2" | "kimi_k25" => Ok(KIMI_PATTERN), // baseten-admin/Kimi-2.5-text-nvfp4-v3 model has model_type: "deepseek_v3" in its config.json
// because Kimi K2.5 is built on the DeepSeek V3 architecture.
// it still ships the Kimi tiktoken tokenizer file, so the KIMI_PATTERN BPE regex is the
// correct pattern to use. No pure DeepSeek V3 model uses tiktoken.model files
// (they use tokenizer.json instead) so this match is safe.
"kimi" | "kimi_k2" | "kimi_k25" | "deepseek_v3" => Ok(KIMI_PATTERN),
_ => Err(Error::msg(format!( _ => Err(Error::msg(format!(
"Unsupported tiktoken model_type '{model_type}'. \ "Unsupported tiktoken model_type '{model_type}'. \
Currently supported: kimi, kimi_k2, kimi_k25. \ Currently supported: kimi, kimi_k2, kimi_k25, deepseek_v3. \
To add a new model type, extend detect_bpe_pattern() in tokenizers/tiktoken.rs \ To add a new model type, extend detect_bpe_pattern() in tokenizers/tiktoken.rs \
with the appropriate BPE regex pattern. \ with the appropriate BPE regex pattern. \
Alternatively, provide a tokenizer.json (HuggingFace format) instead." Alternatively, provide a tokenizer.json (HuggingFace format) instead."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment