Unverified Commit 7711ac6e authored by Mick's avatar Mick Committed by GitHub
Browse files

doc: emphasize and notify the usage of chat_template (#3589)


Co-authored-by: default avatarChayenne <zhaochen20@outlook.com>
parent 7443197a
...@@ -24,7 +24,8 @@ ...@@ -24,7 +24,8 @@
"\n", "\n",
"Launch the server in your terminal and wait for it to initialize.\n", "Launch the server in your terminal and wait for it to initialize.\n",
"\n", "\n",
"Remember to add `--chat-template llama_3_vision` to specify the vision chat template, otherwise the server only supports text.\n", "**Remember to add `--chat-template llama_3_vision` to specify the vision chat template, otherwise the server only supports text, and performance degradation may occur.**\n",
"\n",
"We need to specify `--chat-template` for vision language models because the chat template provided in Hugging Face tokenizer only supports text." "We need to specify `--chat-template` for vision language models because the chat template provided in Hugging Face tokenizer only supports text."
] ]
}, },
......
...@@ -56,6 +56,9 @@ Please consult the documentation below to learn more about the parameters you ma ...@@ -56,6 +56,9 @@ Please consult the documentation below to learn more about the parameters you ma
* `json_model_override_args`: Override model config with the provided JSON. * `json_model_override_args`: Override model config with the provided JSON.
* `delete_ckpt_after_loading`: Delete the model checkpoint after loading the model. * `delete_ckpt_after_loading`: Delete the model checkpoint after loading the model.
> [!IMPORTANT]
> **Make sure the correct `chat_template` is passed, or performance degradation may occur.**
## Serving: HTTP & API ## Serving: HTTP & API
### HTTP Server configuration ### HTTP Server configuration
......
...@@ -353,7 +353,6 @@ register_chat_template( ...@@ -353,7 +353,6 @@ register_chat_template(
) )
) )
register_chat_template( register_chat_template(
ChatTemplate( ChatTemplate(
name="deepseek-v3", name="deepseek-v3",
...@@ -428,12 +427,13 @@ def match_chat_ml(model_path: str): ...@@ -428,12 +427,13 @@ def match_chat_ml(model_path: str):
if "tinyllama" in model_path: if "tinyllama" in model_path:
return get_chat_template("chatml") return get_chat_template("chatml")
# Now the suffix for qwen2 chat model is "instruct" # Now the suffix for qwen2 chat model is "instruct"
if ( if "qwen" in model_path:
"qwen" in model_path if "vl" in model_path:
and ("chat" in model_path or "instruct" in model_path) return get_chat_template("qwen2-vl")
and ("llava" not in model_path) if ("chat" in model_path or "instruct" in model_path) and (
): "llava" not in model_path
return get_chat_template("qwen") ):
return get_chat_template("qwen")
if ( if (
"llava-v1.6-34b" in model_path "llava-v1.6-34b" in model_path
or "llava-v1.6-yi-34b" in model_path or "llava-v1.6-yi-34b" in model_path
...@@ -459,6 +459,13 @@ def match_gemma_it(model_path: str): ...@@ -459,6 +459,13 @@ def match_gemma_it(model_path: str):
return get_chat_template("gemma-it") return get_chat_template("gemma-it")
@register_chat_template_matching_function
def match_openbmb_minicpm(model_path: str):
model_path = model_path.lower()
if "minicpm" in model_path:
return get_chat_template("minicpmv")
@register_chat_template_matching_function @register_chat_template_matching_function
def match_c4ai_command_r(model_path: str): def match_c4ai_command_r(model_path: str):
model_path = model_path.lower() model_path = model_path.lower()
......
...@@ -438,7 +438,9 @@ def _launch_subprocesses(server_args: ServerArgs) -> Tuple[TokenizerManager, Dic ...@@ -438,7 +438,9 @@ def _launch_subprocesses(server_args: ServerArgs) -> Tuple[TokenizerManager, Dic
# Launch tokenizer process # Launch tokenizer process
tokenizer_manager = TokenizerManager(server_args, port_args) tokenizer_manager = TokenizerManager(server_args, port_args)
if server_args.chat_template: if server_args.chat_template:
load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template) load_chat_template_for_openai_api(
tokenizer_manager, server_args.chat_template, server_args.model_path
)
# Wait for the model to finish loading # Wait for the model to finish loading
scheduler_infos = [] scheduler_infos = []
......
...@@ -449,7 +449,8 @@ class LlavaBaseForCausalLM(nn.Module): ...@@ -449,7 +449,8 @@ class LlavaBaseForCausalLM(nn.Module):
projector_weights = { projector_weights = {
"model.mm_projector.0": "multi_modal_projector.linear_1", "model.mm_projector.0": "multi_modal_projector.linear_1",
"model.mm_projector.2": "multi_modal_projector.linear_2", "model.mm_projector.2": "multi_modal_projector.linear_2",
"model.vision_tower.vision_tower": "vision_tower", # Update the vision tower weights if we find them in the checkpoint (it may be finetuned). "model.vision_tower.vision_tower": "vision_tower",
# Update the vision tower weights if we find them in the checkpoint (it may be finetuned).
"model.image_newline": "language_model.model.image_newline", "model.image_newline": "language_model.model.image_newline",
} }
params_dict = dict(self.named_parameters()) params_dict = dict(self.named_parameters())
......
...@@ -20,12 +20,14 @@ import os ...@@ -20,12 +20,14 @@ import os
import time import time
import uuid import uuid
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional from typing import Dict, List
from fastapi import HTTPException, Request, UploadFile from fastapi import HTTPException, Request, UploadFile
from fastapi.responses import ORJSONResponse, StreamingResponse from fastapi.responses import ORJSONResponse, StreamingResponse
from pydantic import ValidationError from pydantic import ValidationError
from sglang.lang.chat_template import get_chat_template_by_model_path
try: try:
from outlines.fsm.json_schema import convert_json_schema_to_str from outlines.fsm.json_schema import convert_json_schema_to_str
except ImportError: except ImportError:
...@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {} ...@@ -92,7 +94,6 @@ file_id_response: Dict[str, FileResponse] = {}
# map file id to file path in SGLang backend # map file id to file path in SGLang backend
file_id_storage: Dict[str, str] = {} file_id_storage: Dict[str, str] = {}
# backend storage directory # backend storage directory
storage_dir = None storage_dir = None
...@@ -116,12 +117,13 @@ def create_streaming_error_response( ...@@ -116,12 +117,13 @@ def create_streaming_error_response(
return json_str return json_str
def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg): def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg, model_path):
global chat_template_name global chat_template_name
logger.info( logger.info(
f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}" f"Use chat template for the OpenAI-compatible API server: {chat_template_arg}"
) )
if not chat_template_exists(chat_template_arg): if not chat_template_exists(chat_template_arg):
if not os.path.exists(chat_template_arg): if not os.path.exists(chat_template_arg):
raise RuntimeError( raise RuntimeError(
...@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg): ...@@ -163,6 +165,18 @@ def load_chat_template_for_openai_api(tokenizer_manager, chat_template_arg):
else: else:
chat_template_name = chat_template_arg chat_template_name = chat_template_arg
# check chat-template
chat_template = get_chat_template_by_model_path(model_path)
if chat_template is not None:
official_chat_template = chat_template.name
used_chat_template = chat_template_name
if official_chat_template != used_chat_template:
logger.warning(
f"Using a chat_template: '{used_chat_template}', "
f"which is different from official chat template: '{official_chat_template}', "
f"This discrepancy may lead to performance degradation."
)
async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None): async def v1_files_create(file: UploadFile, purpose: str, file_storage_pth: str = None):
try: try:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment