[Frontend] Add missing chat templates for various MLLMs (#17758)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>

[Frontend] Add missing chat templates for various MLLMs (#17758)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
8a15c260 · Cyrus Leung · GitHub · 043e4c49 · 8a15c260 · 8a15c260
Unverified Commit 8a15c260 authored May 07, 2025 by Cyrus Leung Committed by GitHub May 07, 2025
11 changed files
--- a/docs/source/serving/multimodal_inputs.md
+++ b/docs/source/serving/multimodal_inputs.md
@@ -216,7 +216,7 @@ A chat template is **required** to use Chat Completions API.

 Although most models come with a chat template, for others you have to define one yourself.
 The chat template can be inferred based on the documentation on the model's HuggingFace repo.
-For example, LLaVA-1.5 (`llava-hf/llava-1.5-7b-hf`) requires a chat template that can be found here: <gh-file:examples/template_llava.jinja>
+For example, DeepSeek-VL2 requires a chat template that can be found here: <gh-file:examples/template_deepseek_vl2.jinja>
 :::

 ### Image Inputs

--- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py
+++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py
@@ -5,7 +5,7 @@ and run online serving with OpenAI client.
 Launch the vLLM server with the following command:

 (single image inference with Llava)
-vllm serve llava-hf/llava-1.5-7b-hf --chat-template template_llava.jinja
+vllm serve llava-hf/llava-1.5-7b-hf

 (multi-image inference with Phi-3.5-vision-instruct)
 vllm serve microsoft/Phi-3.5-vision-instruct --task generate \

--- a/examples/template_chameleon.jinja
+++ b/examples/template_chameleon.jinja
+{%- for message in messages -%}
+    {{- message['content'] -}}
+{%- endfor -%}
--- a/examples/template_florence2.jinja
+++ b/examples/template_florence2.jinja
 {%- for message in messages -%}
-    {%- if message['role'] == 'user' -%}
    {{- message['content'] -}}
-    {%- elif message['role'] == 'assistant' -%}
-        {{- message['content'] -}}
-    {%- endif -%}
 {%- endfor -%}
--- a/examples/template_fuyu.jinja
+++ b/examples/template_fuyu.jinja
+{%- for message in messages -%}
+    {{- message['content'] + '\n' -}}
+{%- endfor -%}
--- a/examples/template_llava.jinja
+++ b/examples/template_llava.jinja
-{%- if messages[0]['role'] == 'system' -%}
-    {%- set system_message = messages[0]['content'] -%}
-    {%- set messages = messages[1:] -%}
-{%- else -%}
-    {% set system_message = '' -%}
-{%- endif -%}
-
-{{ bos_token + system_message }}
-{%- for message in messages -%}
-    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
-        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
-    {%- endif -%}
-
-    {%- if message['role'] == 'user' -%}
-        {{ 'USER: ' + message['content'] + '\n' }}
-    {%- elif message['role'] == 'assistant' -%}
-        {{ 'ASSISTANT: ' + message['content'] + eos_token + '\n' }}
-    {%- endif -%}
-{%- endfor -%}
-
-{%- if add_generation_prompt -%}
-    {{ 'ASSISTANT:' }}
-{% endif %}
--- a/examples/template_paligemma.jinja
+++ b/examples/template_paligemma.jinja
+{%- for message in messages -%}
+    {{- message['content'] -}}
+{%- endfor -%}
--- a/examples/template_qwen_vl.jinja
+++ b/examples/template_qwen_vl.jinja
+{%- for message in messages -%}
+    {{- message['content'] -}}
+{%- endfor -%}
--- a/examples/template_qwen_vl_chat.jinja
+++ b/examples/template_qwen_vl_chat.jinja
+{%- for message in messages -%}
+    {{- '<|im_start|>' + message['role'] + '\n' + message['content'] -}}
+    {%- if (loop.last and add_generation_prompt) or not loop.last -%}
+        {{- '<|im_end|>' + '\n' -}}
+    {%- endif -%}
+{%- endfor -%}
+
+{%- if add_generation_prompt and messages[-1]['role'] != 'assistant' -%}
+    {{- '<|im_start|>assistant\n' -}}
+{%- endif -%}
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -900,6 +900,7 @@ def test_resolve_content_format_hf_defined(model, expected_format):
    [("template_alpaca.jinja", "string"),
     ("template_baichuan.jinja", "string"),
     ("template_blip2.jinja", "string"),
+     ("template_chameleon.jinja", "string"),
     ("template_chatglm.jinja", "string"),
     ("template_chatglm2.jinja", "string"),
     ("template_chatml.jinja", "string"),
@@ -908,9 +909,12 @@ def test_resolve_content_format_hf_defined(model, expected_format):
     ("template_falcon_180b.jinja", "string"),
     ("template_falcon.jinja", "string"),
     ("template_florence2.jinja", "string"),
+     ("template_fuyu.jinja", "string"),
     ("template_inkbot.jinja", "string"),
-     ("template_llava.jinja", "string"),
+     ("template_paligemma.jinja", "string"),
     ("template_teleflm.jinja", "string"),
+     ("template_qwen_vl.jinja", "string"),
+     ("template_qwen_vl_chat.jinja", "string"),
     ("template_vlm2vec.jinja", "openai"),
     ("tool_chat_template_granite_20b_fc.jinja", "string"),
     ("tool_chat_template_hermes.jinja", "string"),

--- a/tests/v1/tpu/test_multimodal.py
+++ b/tests/v1/tpu/test_multimodal.py
@@ -64,8 +64,6 @@ async def test_basic_vision(model_name: str, base64_encoded_image: dict[str,
        "576",
        # NOTE: max-num-batched-tokens>=mm_item_size
        "--disable_chunked_mm_input",
-        "--chat-template",
-        "examples/template_llava.jinja"
    ]

    # Server will pre-compile on first startup (takes a long time).