Unverified Commit 2a41c02e authored by Alexandre Marques's avatar Alexandre Marques Committed by GitHub
Browse files

Extend support for chat template in vLLM (#2902)

* Add support for chat templates defined outside of tokenizer_config.json, as supported by vLLM

* Update template name to avoid conflict with other variable
parent d693dcd2
......@@ -28,6 +28,7 @@ try:
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.entrypoints.chat_utils import resolve_hf_chat_template
except ModuleNotFoundError:
pass
......@@ -133,6 +134,13 @@ class VLLM(TemplateLM):
"Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
)
self.hf_chat_template = resolve_hf_chat_template(
tokenizer=self.tokenizer,
chat_template=None,
tools=None,
trust_remote_code=trust_remote_code,
)
self.custom_prefix_token_id = prefix_token_id
if prefix_token_id is not None:
eval_logger.info(
......@@ -195,6 +203,7 @@ class VLLM(TemplateLM):
tokenize=False,
add_generation_prompt=add_generation_prompt,
continue_final_message=not add_generation_prompt,
chat_template=self.hf_chat_template,
)
return chat_templated
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment