Extend support for chat template in vLLM (#2902)

* Add support for chat templates defined outside of tokenizer_config.json, as supported by vLLM * Update template name to avoid conflict with other variable

Extend support for chat template in vLLM (#2902)
* Add support for chat templates defined outside of tokenizer_config.json, as supported by vLLM * Update template name to avoid conflict with other variable
2a41c02e · Alexandre Marques · GitHub · d693dcd2 · 2a41c02e
Unverified Commit 2a41c02e authored Apr 14, 2025 by Alexandre Marques Committed by GitHub Apr 14, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 0 deletions

lm_eval/models/vllm_causallms.py lm_eval/models/vllm_causallms.py +9 -0

No files found.
--- a/lm_eval/models/vllm_causallms.py
+++ b/lm_eval/models/vllm_causallms.py
@@ -28,6 +28,7 @@ try:
    from vllm import LLM, SamplingParams
    from vllm.lora.request import LoRARequest
    from vllm.transformers_utils.tokenizer import get_tokenizer
+    from vllm.entrypoints.chat_utils import resolve_hf_chat_template
 except ModuleNotFoundError:
    pass

@@ -133,6 +134,13 @@ class VLLM(TemplateLM):
                "Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
            )

+        self.hf_chat_template = resolve_hf_chat_template(
+            tokenizer=self.tokenizer, 
+            chat_template=None, 
+            tools=None,
+            trust_remote_code=trust_remote_code,
+        )
+
        self.custom_prefix_token_id = prefix_token_id
        if prefix_token_id is not None:
            eval_logger.info(
@@ -195,6 +203,7 @@ class VLLM(TemplateLM):
            tokenize=False,
            add_generation_prompt=add_generation_prompt,
            continue_final_message=not add_generation_prompt,
+            chat_template=self.hf_chat_template,
        )

        return chat_templated