Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7405c1c7
Unverified
Commit
7405c1c7
authored
Jul 23, 2024
by
KonradSzafer
Committed by
GitHub
Jul 23, 2024
Browse files
Add method to retrieve used chat template (#32032)
encapsulate chat template logic
parent
605f3245
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
48 deletions
+72
-48
src/transformers/tokenization_utils_base.py
src/transformers/tokenization_utils_base.py
+72
-48
No files found.
src/transformers/tokenization_utils_base.py
View file @
7405c1c7
...
@@ -1772,54 +1772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -1772,54 +1772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
if
tokenizer_kwargs
is
None
:
if
tokenizer_kwargs
is
None
:
tokenizer_kwargs
=
{}
tokenizer_kwargs
=
{}
using_default_template
=
False
chat_template
=
self
.
get_chat_template
(
chat_template
,
tools
)
# First, handle the cases when the model has a dict of multiple templates
if
isinstance
(
self
.
chat_template
,
dict
)
or
(
self
.
chat_template
is
None
and
isinstance
(
self
.
default_chat_template
,
dict
)
):
if
self
.
chat_template
is
not
None
:
template_dict
=
self
.
chat_template
using_default_dict
=
False
else
:
template_dict
=
self
.
default_chat_template
using_default_dict
=
True
if
chat_template
is
not
None
and
chat_template
in
template_dict
:
# The user can pass the name of a template to the chat template argument instead of an entire template
chat_template
=
template_dict
[
chat_template
]
if
using_default_dict
:
using_default_template
=
True
elif
chat_template
is
None
:
if
tools
is
not
None
and
"tool_use"
in
template_dict
:
chat_template
=
template_dict
[
"tool_use"
]
elif
"default"
in
template_dict
:
chat_template
=
template_dict
[
"default"
]
else
:
raise
ValueError
(
"This model has multiple chat templates with no default specified! Please either pass a chat "
"template or the name of the template you wish to use to the `chat_template` argument. Available "
f
"template names are
{
sorted
(
template_dict
.
keys
())
}
."
)
if
using_default_dict
:
using_default_template
=
True
elif
chat_template
is
None
:
# These are the cases when the model has a single template
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
if
self
.
chat_template
is
not
None
:
chat_template
=
self
.
chat_template
else
:
chat_template
=
self
.
default_chat_template
using_default_template
=
True
if
using_default_template
:
logger
.
warning_once
(
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
"very error-prone, because models are often trained with templates different from the class default! "
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
"point any code depending on them will stop working. We recommend setting a valid chat template before "
"then to ensure that this model continues working without issues."
)
if
return_assistant_tokens_mask
and
not
re
.
search
(
r
"\{\%-?\s*generation\s*-?\%\}"
,
chat_template
):
if
return_assistant_tokens_mask
and
not
re
.
search
(
r
"\{\%-?\s*generation\s*-?\%\}"
,
chat_template
):
logger
.
warning_once
(
logger
.
warning_once
(
...
@@ -2012,6 +1965,77 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
...
@@ -2012,6 +1965,77 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
jinja_env
.
globals
[
"raise_exception"
]
=
raise_exception
jinja_env
.
globals
[
"raise_exception"
]
=
raise_exception
return
jinja_env
.
from_string
(
chat_template
)
return
jinja_env
.
from_string
(
chat_template
)
def
get_chat_template
(
self
,
chat_template
:
Optional
[
str
]
=
None
,
tools
:
Optional
[
List
[
Dict
]]
=
None
)
->
str
:
"""
Retrieve the chat template string used for tokenizing chat messages. This template is used
internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
template for better generation tracking.
Args:
chat_template (`str`, *optional*):
A Jinja template or the name of a template to use for this conversion.
It is usually not necessary to pass anything to this argument,
as the model's template will be used by default.
tools (`List[Dict]`, *optional*):
A list of tools (callable functions) that will be accessible to the model. If the template does not
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
giving the name, description and argument types for the tool. See our
[chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
for more information.
Returns:
`str`: The chat template string.
"""
using_default_template
=
False
# First, handle the cases when the model has a dict of multiple templates
if
isinstance
(
self
.
chat_template
,
dict
)
or
(
self
.
chat_template
is
None
and
isinstance
(
self
.
default_chat_template
,
dict
)
):
if
self
.
chat_template
is
not
None
:
template_dict
=
self
.
chat_template
using_default_dict
=
False
else
:
template_dict
=
self
.
default_chat_template
using_default_dict
=
True
if
chat_template
is
not
None
and
chat_template
in
template_dict
:
# The user can pass the name of a template to the chat template argument instead of an entire template
chat_template
=
template_dict
[
chat_template
]
if
using_default_dict
:
using_default_template
=
True
elif
chat_template
is
None
:
if
tools
is
not
None
and
"tool_use"
in
template_dict
:
chat_template
=
template_dict
[
"tool_use"
]
elif
"default"
in
template_dict
:
chat_template
=
template_dict
[
"default"
]
else
:
raise
ValueError
(
"This model has multiple chat templates with no default specified! Please either pass a chat "
"template or the name of the template you wish to use to the `chat_template` argument. Available "
f
"template names are
{
sorted
(
template_dict
.
keys
())
}
."
)
if
using_default_dict
:
using_default_template
=
True
elif
chat_template
is
None
:
# These are the cases when the model has a single template
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
if
self
.
chat_template
is
not
None
:
chat_template
=
self
.
chat_template
else
:
chat_template
=
self
.
default_chat_template
using_default_template
=
True
if
using_default_template
:
logger
.
warning_once
(
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
"very error-prone, because models are often trained with templates different from the class default! "
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
"point any code depending on them will stop working. We recommend setting a valid chat template before "
"then to ensure that this model continues working without issues."
)
return
chat_template
@
property
@
property
def
default_chat_template
(
self
):
def
default_chat_template
(
self
):
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment