Unverified Commit 2a41c02e authored by Alexandre Marques's avatar Alexandre Marques Committed by GitHub
Browse files

Extend support for chat template in vLLM (#2902)

* Add support for chat templates defined outside of tokenizer_config.json, as supported by vLLM

* Update template name to avoid conflict with other variable
parent d693dcd2
...@@ -28,6 +28,7 @@ try: ...@@ -28,6 +28,7 @@ try:
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.entrypoints.chat_utils import resolve_hf_chat_template
except ModuleNotFoundError: except ModuleNotFoundError:
pass pass
...@@ -133,6 +134,13 @@ class VLLM(TemplateLM): ...@@ -133,6 +134,13 @@ class VLLM(TemplateLM):
"Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it." "Found 'gemma' in model name, a BOS token will be used as Gemma series models underperform without it."
) )
self.hf_chat_template = resolve_hf_chat_template(
tokenizer=self.tokenizer,
chat_template=None,
tools=None,
trust_remote_code=trust_remote_code,
)
self.custom_prefix_token_id = prefix_token_id self.custom_prefix_token_id = prefix_token_id
if prefix_token_id is not None: if prefix_token_id is not None:
eval_logger.info( eval_logger.info(
...@@ -195,6 +203,7 @@ class VLLM(TemplateLM): ...@@ -195,6 +203,7 @@ class VLLM(TemplateLM):
tokenize=False, tokenize=False,
add_generation_prompt=add_generation_prompt, add_generation_prompt=add_generation_prompt,
continue_final_message=not add_generation_prompt, continue_final_message=not add_generation_prompt,
chat_template=self.hf_chat_template,
) )
return chat_templated return chat_templated
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment