Unverified Commit 7405c1c7 authored by KonradSzafer's avatar KonradSzafer Committed by GitHub
Browse files

Add method to retrieve used chat template (#32032)

encapsulate chat template logic
parent 605f3245
...@@ -1772,54 +1772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -1772,54 +1772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
if tokenizer_kwargs is None: if tokenizer_kwargs is None:
tokenizer_kwargs = {} tokenizer_kwargs = {}
using_default_template = False chat_template = self.get_chat_template(chat_template, tools)
# First, handle the cases when the model has a dict of multiple templates
if isinstance(self.chat_template, dict) or (
self.chat_template is None and isinstance(self.default_chat_template, dict)
):
if self.chat_template is not None:
template_dict = self.chat_template
using_default_dict = False
else:
template_dict = self.default_chat_template
using_default_dict = True
if chat_template is not None and chat_template in template_dict:
# The user can pass the name of a template to the chat template argument instead of an entire template
chat_template = template_dict[chat_template]
if using_default_dict:
using_default_template = True
elif chat_template is None:
if tools is not None and "tool_use" in template_dict:
chat_template = template_dict["tool_use"]
elif "default" in template_dict:
chat_template = template_dict["default"]
else:
raise ValueError(
"This model has multiple chat templates with no default specified! Please either pass a chat "
"template or the name of the template you wish to use to the `chat_template` argument. Available "
f"template names are {sorted(template_dict.keys())}."
)
if using_default_dict:
using_default_template = True
elif chat_template is None:
# These are the cases when the model has a single template
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
if self.chat_template is not None:
chat_template = self.chat_template
else:
chat_template = self.default_chat_template
using_default_template = True
if using_default_template:
logger.warning_once(
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
"very error-prone, because models are often trained with templates different from the class default! "
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
"point any code depending on them will stop working. We recommend setting a valid chat template before "
"then to ensure that this model continues working without issues."
)
if return_assistant_tokens_mask and not re.search(r"\{\%-?\s*generation\s*-?\%\}", chat_template): if return_assistant_tokens_mask and not re.search(r"\{\%-?\s*generation\s*-?\%\}", chat_template):
logger.warning_once( logger.warning_once(
...@@ -2012,6 +1965,77 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin): ...@@ -2012,6 +1965,77 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
jinja_env.globals["raise_exception"] = raise_exception jinja_env.globals["raise_exception"] = raise_exception
return jinja_env.from_string(chat_template) return jinja_env.from_string(chat_template)
def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[List[Dict]] = None) -> str:
"""
Retrieve the chat template string used for tokenizing chat messages. This template is used
internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
template for better generation tracking.
Args:
chat_template (`str`, *optional*):
A Jinja template or the name of a template to use for this conversion.
It is usually not necessary to pass anything to this argument,
as the model's template will be used by default.
tools (`List[Dict]`, *optional*):
A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
[chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
for more information.
Returns:
`str`: The chat template string.
"""
using_default_template = False
# First, handle the cases when the model has a dict of multiple templates
if isinstance(self.chat_template, dict) or (
self.chat_template is None and isinstance(self.default_chat_template, dict)
):
if self.chat_template is not None:
template_dict = self.chat_template
using_default_dict = False
else:
template_dict = self.default_chat_template
using_default_dict = True
if chat_template is not None and chat_template in template_dict:
# The user can pass the name of a template to the chat template argument instead of an entire template
chat_template = template_dict[chat_template]
if using_default_dict:
using_default_template = True
elif chat_template is None:
if tools is not None and "tool_use" in template_dict:
chat_template = template_dict["tool_use"]
elif "default" in template_dict:
chat_template = template_dict["default"]
else:
raise ValueError(
"This model has multiple chat templates with no default specified! Please either pass a chat "
"template or the name of the template you wish to use to the `chat_template` argument. Available "
f"template names are {sorted(template_dict.keys())}."
)
if using_default_dict:
using_default_template = True
elif chat_template is None:
# These are the cases when the model has a single template
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
if self.chat_template is not None:
chat_template = self.chat_template
else:
chat_template = self.default_chat_template
using_default_template = True
if using_default_template:
logger.warning_once(
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
"very error-prone, because models are often trained with templates different from the class default! "
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
"point any code depending on them will stop working. We recommend setting a valid chat template before "
"then to ensure that this model continues working without issues."
)
return chat_template
@property @property
def default_chat_template(self): def default_chat_template(self):
""" """
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment